(cls, config, tokenizer, **kwargs)
| 33 | |
| 34 | @classmethod |
| 35 | def load_dataset(cls, config, tokenizer, **kwargs): |
| 36 | config = cls.get_default_config(config) |
| 37 | if config.type == 'huggingface': |
| 38 | text_processor = TextProcessor(config.text_processor, tokenizer) |
| 39 | return HuggingfaceDataset( |
| 40 | config.huggingface_dataset, tokenizer, text_processor, **kwargs |
| 41 | ) |
| 42 | elif config.type == 'json': |
| 43 | text_processor = TextProcessor(config.text_processor, tokenizer) |
| 44 | return JsonDataset(config.json_dataset, tokenizer, text_processor, **kwargs) |
| 45 | elif config.type == 'json_vision': |
| 46 | vision_text_processor = VisionTextProcessor(config.vision_text_processor, tokenizer) |
| 47 | return JsonVisionDataset(config.json_vision_dataset, tokenizer, vision_text_processor, **kwargs) |
| 48 | else: |
| 49 | raise ValueError(f'Unknown dataset type: {config.type}') |
| 50 | |
| 51 | def __init__(self): |
| 52 | raise ValueError('DatasetFactory is a static class and should not be instantiated.') |
no test coverage detected