Class CodeInstructor

scripts/data_augment/data_augment.py:394–423 · view source on GitHub ↗

Source from the content-addressed store, hash-verified

392
393
394	class CodeInstructor(DataAugmenter):
395	def __init__(self):
396	self.tokenizer = AutoTokenizer.from_pretrained("Graverman/t5-code-summary")
397	self.model = T5ForConditionalGeneration.from_pretrained("Graverman/t5-code-summary")
398
399	def parse(self, codes):
400	source_encoding = self.tokenizer(
401	codes,
402	max_length=300,
403	padding="max_length",
404	truncation=True,
405	return_attention_mask=True,
406	add_special_tokens=True,
407	return_tensors="pt",
408	)
409	outputs = self.model.generate(
410	input_ids=source_encoding["input_ids"],
411	attention_mask=source_encoding["attention_mask"],
412	max_length=100,
413	length_penalty=0.75,
414	repetition_penalty=2.5,
415	early_stopping=True,
416	use_cache=True,
417	)
418	summaries = [self.tokenizer.decode(o, skip_special_tokens=True) for o in outputs]
419
420	questions = ["Write a script that does the following:\n" + s for s in summaries]
421	answers = codes
422
423	return questions, answers
424
425
426	def recognize_entities(text, model, n=4, person="ignore"):

get_augmenterFunction · 0.85

no outgoing calls

no test coverage detected