MCPcopy Index your code
hub / github.com/THUDM/GLM / encode

Method encode

tasks/superglue/dataset.py:550–587  ·  view source on GitHub ↗
(self, example: InputExample, tokenizer, seq_length, args)

Source from the content-addressed store, hash-verified

548 return [0, 1]
549
550 def encode(self, example: InputExample, tokenizer, seq_length, args):
551 if args.pretrained_bert:
552 ids_list, types_list, paddings_list = [], [], []
553 else:
554 ids_list, positions_list, sep_list = [], [], []
555 question = example.meta['question']
556 joiner = 'because' if question == 'cause' else 'so'
557 text_a = punctuation_standardization(example.text_a) + " " + joiner
558 tokens_a = tokenizer.EncodeAsIds(text_a).tokenization
559 for choice in [example.meta["choice1"], example.meta["choice2"]]:
560 choice = punctuation_standardization(choice)
561 tokens_b = tokenizer.EncodeAsIds(choice).tokenization
562 num_special_tokens = num_special_tokens_to_add(tokens_a, tokens_b, None, add_cls=True, add_sep=True,
563 add_piece=False)
564 if len(tokens_a) + len(tokens_b) + num_special_tokens > seq_length:
565 self.num_truncated += 1
566 data = build_input_from_ids(tokens_a, tokens_b, None, seq_length, tokenizer, args,
567 add_cls=True, add_sep=True, add_piece=False)
568 ids, types, paddings, position_ids, sep, target_ids, loss_masks = data
569 if args.pretrained_bert:
570 ids_list.append(ids)
571 types_list.append(types)
572 paddings_list.append(paddings)
573 else:
574 ids_list.append(ids)
575 positions_list.append(position_ids)
576 sep_list.append(sep)
577 label = 0
578 if example.label is not None:
579 label = example.label
580 label = self.get_labels().index(label)
581 if args.pretrained_bert:
582 sample = build_sample(ids_list, label=label, types=types_list, paddings=paddings_list,
583 unique_id=example.guid)
584 else:
585 sample = build_sample(ids_list, positions=positions_list, masks=sep_list, label=label,
586 unique_id=example.guid)
587 return sample
588
589 @staticmethod
590 def _create_examples(path: str, set_type: str) -> List[InputExample]:

Callers

nothing calls this directly

Calls 7

get_labelsMethod · 0.95
build_input_from_idsFunction · 0.90
build_sampleFunction · 0.90
appendMethod · 0.80
EncodeAsIdsMethod · 0.45

Tested by

no test coverage detected