( dataset_name, dataset_weight, output_path, seed)
| 86 | return all_lines |
| 87 | |
| 88 | def create_dataset( dataset_name, dataset_weight, output_path, seed): |
| 89 | raw_dataset = get_raw_dataset(dataset_name, output_path, seed) |
| 90 | train_dataset = raw_dataset.get_train_data() |
| 91 | train_dataset = get_weight_data(train_dataset, dataset_weight) |
| 92 | return train_dataset |
| 93 | |
| 94 | def process_concat_data(text, tokenizer, max_seq_len, args): |
| 95 | texts = text.split("<_end>") |
no test coverage detected