(text)
| 409 | |
| 410 | |
| 411 | def normalize_quote(text): |
| 412 | def fn(found_text): |
| 413 | from nltk import sent_tokenize # NLTK doesn't along with multiprocessing |
| 414 | |
| 415 | found_text = found_text.group() |
| 416 | unquoted_text = found_text[1:-1] |
| 417 | |
| 418 | sentences = sent_tokenize(unquoted_text) |
| 419 | return " ".join(["'{}'".format(sent) for sent in sentences]) |
| 420 | |
| 421 | return re.sub(quote_checker, fn, text) |
| 422 | |
| 423 | |
| 424 | number_checker = "([+-]?\d[\d,]*)[\.]?\d*" |