Scan local files under raw_root to build download metadata. Walks through raw_root to find downloaded 3D files (.glb, .obj, .fbx, .usdz, .gltf, .zip), matches them against global_metadata via file_identifier (uid extracted from URL) to recover the sha256 -> local_path mapping.
(raw_root, global_metadata)
| 52 | |
| 53 | |
| 54 | def build_downloaded_metadata_from_files(raw_root, global_metadata): |
| 55 | """Scan local files under raw_root to build download metadata. |
| 56 | |
| 57 | Walks through raw_root to find downloaded 3D files (.glb, .obj, .fbx, .usdz, .gltf, .zip), |
| 58 | matches them against global_metadata via file_identifier (uid extracted from URL) to recover |
| 59 | the sha256 -> local_path mapping. |
| 60 | """ |
| 61 | extensions = ('.glb', '.obj', '.fbx', '.usdz', '.gltf', '.zip') |
| 62 | |
| 63 | # Build uid -> sha256 mapping from global metadata |
| 64 | uid_to_sha256 = {} |
| 65 | if 'file_identifier' in global_metadata.columns: |
| 66 | for _, row in global_metadata.iterrows(): |
| 67 | uid = str(row['file_identifier']).split('/')[-1] |
| 68 | uid_to_sha256[uid] = row['sha256'] |
| 69 | |
| 70 | # Scan files |
| 71 | records = [] |
| 72 | for dirpath, dirnames, filenames in os.walk(raw_root): |
| 73 | for fname in filenames: |
| 74 | if not fname.lower().endswith(extensions): |
| 75 | continue |
| 76 | uid = os.path.splitext(fname)[0] |
| 77 | sha256 = uid_to_sha256.get(uid) |
| 78 | if sha256 is not None: |
| 79 | full_path = os.path.join(dirpath, fname) |
| 80 | # Store path relative to parent of raw_root (i.e. download_root) |
| 81 | rel_path = os.path.relpath(full_path, os.path.dirname(raw_root)) |
| 82 | records.append({'sha256': sha256, 'local_path': rel_path}) |
| 83 | |
| 84 | if len(records) == 0: |
| 85 | return None |
| 86 | |
| 87 | df = pd.DataFrame(records).set_index('sha256') |
| 88 | print(f' [from_file] Found {len(df)} downloaded files under {raw_root}') |
| 89 | |
| 90 | # Save as metadata.csv under raw_root |
| 91 | os.makedirs(raw_root, exist_ok=True) |
| 92 | df.to_csv(os.path.join(raw_root, 'metadata.csv')) |
| 93 | return df |
| 94 | |
| 95 | |
| 96 | # Check if directory is a multi-view directory (ending with _view or _view_fix) |