(row)
| 102 | if dataset_type == 'TokenizedDataset': |
| 103 | # already tokenized when saved |
| 104 | def process_fn(row): |
| 105 | ret, attention_mask_sep = pad_to_len(row.flatten()) |
| 106 | return {'text': ret, |
| 107 | 'loss_mask': np.array([1] * attention_mask_sep + [0] * (len(ret) - attention_mask_sep)) |
| 108 | } |
| 109 | |
| 110 | elif dataset_type == 'TextCodeDataset': |
| 111 | def process_fn(row): |
no test coverage detected