| 87 | |
| 88 | |
| 89 | class EssayReviser(DataAugmenter): |
| 90 | def __init__(self): |
| 91 | nltk.download("wordnet") |
| 92 | nltk.download("omw-1.4") |
| 93 | |
| 94 | def parse_single(self, essay): |
| 95 | instructions = [] |
| 96 | |
| 97 | # Make structure error (shuffle one paragraph with another) |
| 98 | essay_paragraphs = essay.split("\n\n") # Splitting a String by newline character (\n) |
| 99 | |
| 100 | rand1 = random.randint(0, len(essay_paragraphs) - 1) |
| 101 | rand2 = random.randint(0, len(essay_paragraphs) - 1) |
| 102 | |
| 103 | temp = essay_paragraphs[rand1] |
| 104 | essay_paragraphs[rand1] = essay_paragraphs[rand2] |
| 105 | essay_paragraphs[rand2] = temp |
| 106 | |
| 107 | corrupted_essay = "\n\n".join(essay_paragraphs) |
| 108 | |
| 109 | instructions.append("Fix structure errors in this essay" + corrupted_essay) |
| 110 | |
| 111 | essay_words = essay.split() |
| 112 | for i in range(len(essay_words)): |
| 113 | if random.randint(0, 100) < 30: |
| 114 | suggestion = [] |
| 115 | for syn in wordnet.synsets(essay_words[i]): |
| 116 | for l in syn.lemmas(): |
| 117 | suggestion.append(l.name()) |
| 118 | if suggestion != []: |
| 119 | essay_words[i] = suggestion[random.randint(0, len(suggestion) - 1)] |
| 120 | |
| 121 | corrupted_essay = " ".join(essay_words) |
| 122 | |
| 123 | instructions.append("Fix grammar errors in this essay: " + corrupted_essay) |
| 124 | |
| 125 | # you can change the number 60 to change how much corrupted this essay will be |
| 126 | for _ in range(len(essay) // 60): |
| 127 | rand = random.randint(0, len(essay)) |
| 128 | corrupted_essay = essay[:rand] + random.choice(string.ascii_letters) + essay[rand + 1 :] |
| 129 | |
| 130 | instructions.append("Fix typing errors in this essay" + corrupted_essay) |
| 131 | |
| 132 | return instructions, [essay] * len(instructions) |
| 133 | |
| 134 | |
| 135 | class StackExchangeBuilder(DataAugmenter): |