| 24 | |
| 25 | |
| 26 | def load_file_metas(file_metas: List[Dict[str, str]]) -> List[Dict[str, str]]: |
| 27 | files = [] |
| 28 | for file_meta in file_metas: |
| 29 | file_list_path = file_meta["file_list"] |
| 30 | image_path_key = file_meta["image_path_key"] |
| 31 | short_prompt_key = file_meta["short_prompt_key"] |
| 32 | long_prompt_key = file_meta["long_prompt_key"] |
| 33 | ext = os.path.splitext(file_list_path)[1].lower() |
| 34 | assert ext == ".parquet", f"only support parquet format" |
| 35 | df = pl.read_parquet(file_list_path) |
| 36 | for row in df.iter_rows(named=True): |
| 37 | files.append( |
| 38 | { |
| 39 | "image_path": row[image_path_key], |
| 40 | "short_prompt": row[short_prompt_key], |
| 41 | "long_prompt": row[long_prompt_key], |
| 42 | } |
| 43 | ) |
| 44 | return files |
| 45 | |
| 46 | |
| 47 | # https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/image_datasets.py |