Build a sequence of input from 3 segments: persona, history and last reply
(history, reply, tokenizer, with_eos=True)
| 60 | |
| 61 | |
| 62 | def build_input_from_segments(history, reply, tokenizer, with_eos=True): |
| 63 | """ Build a sequence of input from 3 segments: persona, history and last reply """ |
| 64 | bos, eos, pad, speaker1, speaker2 = tokenizer.convert_tokens_to_ids(SPECIAL_TOKENS) |
| 65 | sequence = [[bos]] + history + [reply + ([eos] if with_eos else [])] |
| 66 | sequence = [sequence[0]] + [[speaker2 if i % 2 else speaker1] + s for i, s in enumerate(sequence[1:])] |
| 67 | instance = {} |
| 68 | instance["input_ids"] = list(chain(*sequence)) |
| 69 | instance["token_type_ids"] = [bos] + [speaker2 if i % 2 else speaker1 for i, s in enumerate(sequence[1:]) |
| 70 | for _ in s] |
| 71 | return instance, sequence |
| 72 | |
| 73 | |
| 74 | def test_data(args): |