Default data parser for CSVDataset. It 1. ignores any columns which does not have a header. 2. tries to convert to list of numeric values(generated by np.array().tolist()) if cell data is a str separated by ','. 3. read data and infer data type directly, otherwise.
| 365 | |
| 366 | |
| 367 | class DefaultDataParser: |
| 368 | """Default data parser for CSVDataset. It |
| 369 | 1. ignores any columns which does not have a header. |
| 370 | 2. tries to convert to list of numeric values(generated by |
| 371 | np.array().tolist()) if cell data is a str separated by ','. |
| 372 | 3. read data and infer data type directly, otherwise. |
| 373 | """ |
| 374 | |
| 375 | def __call__(self, df: pd.DataFrame): |
| 376 | data = {} |
| 377 | for header in df: |
| 378 | if "Unnamed" in header: |
| 379 | dgl_warning("Unnamed column is found. Ignored...") |
| 380 | continue |
| 381 | dt = df[header].to_numpy().squeeze() |
| 382 | if len(dt) > 0 and isinstance(dt[0], str): |
| 383 | # probably consists of list of numeric values |
| 384 | dt = np.array([ast.literal_eval(row) for row in dt]) |
| 385 | data[header] = dt |
| 386 | return data |
no outgoing calls