(f, fmt=None)
| 160 | |
| 161 | |
| 162 | def load(f, fmt=None): |
| 163 | def load_pkl(pth): |
| 164 | return pickle.load(open(pth, 'rb')) |
| 165 | |
| 166 | def load_json(pth): |
| 167 | return json.load(open(pth, 'r', encoding='utf-8')) |
| 168 | |
| 169 | def load_jsonl(f): |
| 170 | lines = open(f, encoding='utf-8').readlines() |
| 171 | lines = [x.strip() for x in lines] |
| 172 | if lines[-1] == '': |
| 173 | lines = lines[:-1] |
| 174 | data = [json.loads(x) for x in lines] |
| 175 | return data |
| 176 | |
| 177 | def load_xlsx(f): |
| 178 | return pd.read_excel(f) |
| 179 | |
| 180 | def load_csv(f): |
| 181 | return pd.read_csv(f) |
| 182 | |
| 183 | def load_tsv(f): |
| 184 | return pd.read_csv(f, sep='\t') |
| 185 | |
| 186 | import validators |
| 187 | if validators.url(f): |
| 188 | tgt = osp.join(LMUDataRoot(), 'files', osp.basename(f)) |
| 189 | if not osp.exists(tgt): |
| 190 | download_file(f, tgt) |
| 191 | f = tgt |
| 192 | |
| 193 | handlers = dict(pkl=load_pkl, json=load_json, jsonl=load_jsonl, xlsx=load_xlsx, csv=load_csv, tsv=load_tsv) |
| 194 | if fmt is not None: |
| 195 | return handlers[fmt](f) |
| 196 | |
| 197 | suffix = f.split('.')[-1] |
| 198 | return handlers[suffix](f) |
| 199 | |
| 200 | |
| 201 | def download_file(url, filename=None): |
no test coverage detected