| 67 | |
| 68 | |
| 69 | def dfs_linearize_tokenize(sample: dict, tokenizer: PENMANBartTokenizer, remove_space=False, text_key='snt') -> dict: |
| 70 | amr = sample.get('amr', None) |
| 71 | if amr: |
| 72 | l, e = tokenizer.linearize(amr) |
| 73 | sample['graph_tokens'] = e['linearized_graphs'] |
| 74 | sample['graph_token_ids'] = l |
| 75 | text = amr.metadata[text_key] |
| 76 | else: |
| 77 | text = sample['text'] |
| 78 | if remove_space: |
| 79 | text = ''.join(text.split()) |
| 80 | sample['text'] = text |
| 81 | sample['text_token_ids'] = tokenizer.encode(text) |
| 82 | return sample |
| 83 | |
| 84 | |
| 85 | def dfs_linearize_levi(sample: dict, tokenizer: PENMANBartTokenizer, remove_space=False) -> dict: |