(path_)
| 69 | # multilabel dataset support (only for csvs) |
| 70 | label_key = non_binary_cols |
| 71 | def get_dataset_from_path(path_): |
| 72 | if lazy: |
| 73 | # get lazily loaded dataset |
| 74 | named_corpora = False |
| 75 | if supported_corpus(path_): |
| 76 | named_corpora = True |
| 77 | name = path_ |
| 78 | path_ = corpora.NAMED_CORPORA[path_].PATH |
| 79 | if not exists_lazy(path_, data_type='data'): |
| 80 | # create cached version of dataset for lazy loading if it doesn't exist |
| 81 | text = get_dataset(name if named_corpora else path_, text_key=text_key, label_key=label_key, binarize_sent=binarize_sent, |
| 82 | delim=delim, drop_unlabeled=drop_unlabeled, loose_json=loose) |
| 83 | make_lazy(path_, text.X, data_type='data') |
| 84 | text = lazy_array_loader(path_, data_type='data', map_fn=process_fn) |
| 85 | else: |
| 86 | # get dataset |
| 87 | text = get_dataset(path_, text_key=text_key, label_key=label_key, binarize_sent=binarize_sent, |
| 88 | delim=delim, drop_unlabeled=drop_unlabeled, loose_json=loose, preprocess_fn=process_fn) |
| 89 | return text |
| 90 | # get one or multiple datasets and concatenate |
| 91 | if isinstance(path, str): |
| 92 | path = [path] |
no test coverage detected