MCPcopy Index your code
hub / github.com/idank/explainshell / load_metadata

Function load_metadata

tools/fetch_manned.py:356–416  ·  view source on GitHub ↗

Parse all metadata files. Returns parsed data structures.

(data_dir)

Source from the content-addressed store, hash-verified

354
355
356def load_metadata(data_dir):
357 """Parse all metadata files. Returns parsed data structures."""
358 # Parse systems: id -> (name, release, short)
359 systems = {}
360 for row in parse_tsv(os.path.join(data_dir, "systems.tsv")):
361 sys_id = int(row[0])
362 name = row[1]
363 release = row[2] if len(row) > 2 else ""
364 short = row[3] if len(row) > 3 else ""
365 # Normalize PostgreSQL COPY NULL marker
366 if release == "\\N":
367 release = ""
368 if short == "\\N":
369 short = ""
370 systems[sys_id] = (name, release, short)
371 logger.info("Loaded %d systems", len(systems))
372
373 # Parse locales: id -> locale
374 locales = {}
375 for row in parse_tsv(os.path.join(data_dir, "locales.tsv")):
376 loc_id = int(row[0])
377 locale = row[1]
378 locales[loc_id] = locale
379 logger.info("Loaded %d locales", len(locales))
380
381 # Determine English locale IDs
382 english_locale_ids = set()
383 for loc_id, locale in locales.items():
384 if locale == "" or locale.startswith("en"):
385 english_locale_ids.add(loc_id)
386 logger.info("English locale IDs: %s", english_locale_ids)
387
388 # Parse mans: id -> (name, section)
389 mans = {}
390 for row in parse_tsv(os.path.join(data_dir, "mans.tsv")):
391 man_id = int(row[0])
392 name, section = row[1], row[2]
393 mans[man_id] = (name, section)
394 logger.info("Loaded %d man page entries", len(mans))
395
396 # Parse packages: id -> system_id
397 packages = {}
398 for row in parse_tsv(os.path.join(data_dir, "packages.tsv")):
399 pkg_id = int(row[0])
400 system = int(row[1])
401 packages[pkg_id] = system
402 logger.info("Loaded %d packages", len(packages))
403
404 # Parse package_versions: id -> (package_id, released_tuple)
405 # `released` (column 4) is a YYYY-MM-DD date that feeds the
406 # manned-style selector ranking — within a package we prefer the
407 # pkgver with the latest release date.
408 pkg_versions: dict[int, tuple[int, tuple[int, int, int]]] = {}
409 for row in parse_tsv(os.path.join(data_dir, "package_versions.tsv")):
410 pv_id = int(row[0])
411 pkg_id = int(row[1])
412 released = _parse_date(row[3]) if len(row) > 3 else (0, 0, 0)
413 pkg_versions[pv_id] = (pkg_id, released)

Callers 1

cmd_extractFunction · 0.85

Calls 3

parse_tsvFunction · 0.85
_parse_dateFunction · 0.85
addMethod · 0.80

Tested by

no test coverage detected