MCPcopy Index your code
hub / github.com/LAION-AI/Open-Assistant / EssayReviser

Class EssayReviser

scripts/data_augment/data_augment.py:89–132  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

87
88
89class EssayReviser(DataAugmenter):
90 def __init__(self):
91 nltk.download("wordnet")
92 nltk.download("omw-1.4")
93
94 def parse_single(self, essay):
95 instructions = []
96
97 # Make structure error (shuffle one paragraph with another)
98 essay_paragraphs = essay.split("\n\n") # Splitting a String by newline character (\n)
99
100 rand1 = random.randint(0, len(essay_paragraphs) - 1)
101 rand2 = random.randint(0, len(essay_paragraphs) - 1)
102
103 temp = essay_paragraphs[rand1]
104 essay_paragraphs[rand1] = essay_paragraphs[rand2]
105 essay_paragraphs[rand2] = temp
106
107 corrupted_essay = "\n\n".join(essay_paragraphs)
108
109 instructions.append("Fix structure errors in this essay" + corrupted_essay)
110
111 essay_words = essay.split()
112 for i in range(len(essay_words)):
113 if random.randint(0, 100) < 30:
114 suggestion = []
115 for syn in wordnet.synsets(essay_words[i]):
116 for l in syn.lemmas():
117 suggestion.append(l.name())
118 if suggestion != []:
119 essay_words[i] = suggestion[random.randint(0, len(suggestion) - 1)]
120
121 corrupted_essay = " ".join(essay_words)
122
123 instructions.append("Fix grammar errors in this essay: " + corrupted_essay)
124
125 # you can change the number 60 to change how much corrupted this essay will be
126 for _ in range(len(essay) // 60):
127 rand = random.randint(0, len(essay))
128 corrupted_essay = essay[:rand] + random.choice(string.ascii_letters) + essay[rand + 1 :]
129
130 instructions.append("Fix typing errors in this essay" + corrupted_essay)
131
132 return instructions, [essay] * len(instructions)
133
134
135class StackExchangeBuilder(DataAugmenter):

Callers 1

get_augmenterFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected