MCPcopy
hub / github.com/TencentARC/Pixal3D / build_downloaded_metadata_from_files

Function build_downloaded_metadata_from_files

data_toolkit/build_metadata.py:54–93  ·  view source on GitHub ↗

Scan local files under raw_root to build download metadata. Walks through raw_root to find downloaded 3D files (.glb, .obj, .fbx, .usdz, .gltf, .zip), matches them against global_metadata via file_identifier (uid extracted from URL) to recover the sha256 -> local_path mapping.

(raw_root, global_metadata)

Source from the content-addressed store, hash-verified

52
53
54def build_downloaded_metadata_from_files(raw_root, global_metadata):
55 """Scan local files under raw_root to build download metadata.
56
57 Walks through raw_root to find downloaded 3D files (.glb, .obj, .fbx, .usdz, .gltf, .zip),
58 matches them against global_metadata via file_identifier (uid extracted from URL) to recover
59 the sha256 -> local_path mapping.
60 """
61 extensions = ('.glb', '.obj', '.fbx', '.usdz', '.gltf', '.zip')
62
63 # Build uid -> sha256 mapping from global metadata
64 uid_to_sha256 = {}
65 if 'file_identifier' in global_metadata.columns:
66 for _, row in global_metadata.iterrows():
67 uid = str(row['file_identifier']).split('/')[-1]
68 uid_to_sha256[uid] = row['sha256']
69
70 # Scan files
71 records = []
72 for dirpath, dirnames, filenames in os.walk(raw_root):
73 for fname in filenames:
74 if not fname.lower().endswith(extensions):
75 continue
76 uid = os.path.splitext(fname)[0]
77 sha256 = uid_to_sha256.get(uid)
78 if sha256 is not None:
79 full_path = os.path.join(dirpath, fname)
80 # Store path relative to parent of raw_root (i.e. download_root)
81 rel_path = os.path.relpath(full_path, os.path.dirname(raw_root))
82 records.append({'sha256': sha256, 'local_path': rel_path})
83
84 if len(records) == 0:
85 return None
86
87 df = pd.DataFrame(records).set_index('sha256')
88 print(f' [from_file] Found {len(df)} downloaded files under {raw_root}')
89
90 # Save as metadata.csv under raw_root
91 os.makedirs(raw_root, exist_ok=True)
92 df.to_csv(os.path.join(raw_root, 'metadata.csv'))
93 return df
94
95
96# Check if directory is a multi-view directory (ending with _view or _view_fix)

Callers 1

build_metadata.pyFile · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected