gets dataset object based on keyword args and file at `path`
(path, **kwargs)
| 42 | return os.path.splitext(path)[1] |
| 43 | |
| 44 | def get_dataset(path, **kwargs): |
| 45 | """gets dataset object based on keyword args and file at `path`""" |
| 46 | if supported_corpus(path): |
| 47 | return corpora.NAMED_CORPORA[path](**kwargs) |
| 48 | ext = get_ext(path) |
| 49 | if '.json' in ext: |
| 50 | text = json_dataset(path, **kwargs) |
| 51 | elif ext in ['.csv', '.tsv']: |
| 52 | text = csv_dataset(path, **kwargs) |
| 53 | else: |
| 54 | raise NotImplementedError('data file type %s is not supported'%(ext)) |
| 55 | return text |
| 56 | |
| 57 | def supported_corpus(corpus_name): |
| 58 | """checks if corpus name is defined in `corpora.py`""" |
no test coverage detected