MCPcopy Index your code
hub / github.com/THUDM/GLM / encode

Method encode

tasks/superglue/dataset.py:770–803  ·  view source on GitHub ↗
(self, example: InputExample, tokenizer, seq_length, args)

Source from the content-addressed store, hash-verified

768 output.write(json.dumps(data) + "\n")
769
770 def encode(self, example: InputExample, tokenizer, seq_length, args):
771 if args.pretrained_bert:
772 ids_list, types_list, paddings_list = [], [], []
773 else:
774 ids_list, positions_list, sep_list = [], [], []
775 tokens_a = tokenizer.EncodeAsIds(example.text_a).tokenization
776 tokens_b = tokenizer.EncodeAsIds(example.text_b).tokenization if example.text_b else None
777 for answer in example.meta["candidates"]:
778 answer_ids = tokenizer.EncodeAsIds(answer).tokenization
779 total_length = len(tokens_a) + len(tokens_b) + len(answer_ids)
780 total_length += num_special_tokens_to_add(tokens_a, tokens_b + answer_ids, None, add_cls=True, add_sep=True,
781 add_piece=False)
782 if total_length > seq_length:
783 self.num_truncated += 1
784 data = build_input_from_ids(tokens_a, tokens_b + answer_ids, None, seq_length, tokenizer, args,
785 add_cls=True, add_sep=True, add_piece=False)
786 ids, types, paddings, position_ids, sep, target_ids, loss_masks = data
787 if args.pretrained_bert:
788 ids_list.append(ids)
789 types_list.append(types)
790 paddings_list.append(paddings)
791 else:
792 ids_list.append(ids)
793 positions_list.append(position_ids)
794 sep_list.append(sep)
795 label = example.label
796 label = self.get_labels().index(label)
797 if args.pretrained_bert:
798 sample = build_sample(ids_list, label=label, types=types_list, paddings=paddings_list,
799 unique_id=example.guid)
800 else:
801 sample = build_sample(ids_list, positions=positions_list, masks=sep_list, label=label,
802 unique_id=example.guid)
803 return sample
804
805 @staticmethod
806 def _create_examples(path, set_type, seed=42, max_train_candidates_per_question: int = 10, for_train=False) -> List[

Callers

nothing calls this directly

Calls 6

get_labelsMethod · 0.95
build_input_from_idsFunction · 0.90
build_sampleFunction · 0.90
appendMethod · 0.80
EncodeAsIdsMethod · 0.45

Tested by

no test coverage detected