| 437 | |
| 438 | |
| 439 | def build_artifact_index( |
| 440 | run_dir: Path, |
| 441 | patterns: list[str] | None = None, |
| 442 | extra_roots: dict[str, Path] | None = None, |
| 443 | ) -> dict[str, Any]: |
| 444 | patterns = patterns or [ |
| 445 | "config*", |
| 446 | "commands.sh", |
| 447 | "run_manifest.json", |
| 448 | "summary.md", |
| 449 | "artifact_index.json", |
| 450 | "validation/**/*", |
| 451 | "logs/**/*", |
| 452 | "versions/**/*", |
| 453 | "workflow/**/*", |
| 454 | "fastqc/**/*", |
| 455 | "multiqc/**/*", |
| 456 | "rnaseq_salmon/**/*", |
| 457 | "qc/**/*", |
| 458 | "results/**/*", |
| 459 | "plots/**/*", |
| 460 | "visualizations/**/*", |
| 461 | "tables/**/*", |
| 462 | "notebooks/**/*", |
| 463 | "variants/**/*", |
| 464 | "alignment/**/*", |
| 465 | "peaks/**/*", |
| 466 | "tracks/**/*", |
| 467 | "motifs/**/*", |
| 468 | "consensus/**/*", |
| 469 | "f1r2/**/*", |
| 470 | "functional_profile/**/*", |
| 471 | "taxonomic_classification/**/*", |
| 472 | "bcl/**/*", |
| 473 | "demux/**/*", |
| 474 | "methods/**/*", |
| 475 | "manifest/**/*", |
| 476 | "resources/**/*", |
| 477 | "*.json", |
| 478 | ] |
| 479 | artifacts = [] |
| 480 | seen: set[Path] = set() |
| 481 | |
| 482 | def collect(root_label: str | None, root: Path, root_patterns: list[str]) -> None: |
| 483 | prefix = "" if not root_label else f"{root_label}/" |
| 484 | for pattern in root_patterns: |
| 485 | for path in root.glob(pattern): |
| 486 | if path.is_file() and path not in seen: |
| 487 | seen.add(path) |
| 488 | artifacts.append( |
| 489 | { |
| 490 | "path": f"{prefix}{path.relative_to(root)}", |
| 491 | "bytes": path.stat().st_size, |
| 492 | "modified_at": datetime.fromtimestamp(path.stat().st_mtime) |
| 493 | .astimezone() |
| 494 | .isoformat(timespec="seconds"), |
| 495 | "sha256": sha256_file(path) |
| 496 | if path.stat().st_size <= MAX_AUTO_CHECKSUM_BYTES |