(examples: Dict, tokenizer, text_column: str, max_length: int)
| 103 | |
| 104 | |
| 105 | def tokenize_fn(examples: Dict, tokenizer, text_column: str, max_length: int): |
| 106 | texts = examples[text_column] |
| 107 | texts = [t for t in texts if isinstance(t, str) and len(t.strip()) > 0] |
| 108 | return tokenizer(texts, truncation=True, padding=False, max_length=max_length) |
| 109 | |
| 110 | |
| 111 | class RandomTokenDataset(torch.utils.data.Dataset): |
no outgoing calls
no test coverage detected
searching dependent graphs…