MCPcopy
hub / github.com/openai/plugins / validate_inputs

Function validate_inputs

plugins/ngs-analysis/scripts/run_dna_germline_variants.py:58–136  ·  view source on GitHub ↗
(args: argparse.Namespace)

Source from the content-addressed store, hash-verified

56
57
58def validate_inputs(args: argparse.Namespace) -> tuple[dict[str, Any], list[dict[str, str]]]:
59 sample_sheet = args.sample_sheet.expanduser().resolve()
60 reference = args.reference_fasta.expanduser().resolve()
61 target_bed = args.target_bed.expanduser().resolve() if args.target_bed else None
62 rows, columns = read_samples(sample_sheet)
63 errors: list[str] = []
64 warnings: list[str] = []
65 normalized: list[dict[str, str]] = []
66
67 if not reference.exists():
68 errors.append(f"reference FASTA does not exist: {reference}")
69 if not (Path(str(reference) + ".fai")).exists():
70 warnings.append(
71 f"reference FASTA index is missing and may be created by samtools faidx: {reference}.fai"
72 )
73 if not (reference.with_suffix(".dict")).exists():
74 warnings.append(
75 f"reference sequence dictionary is missing and may be created by GATK: {reference.with_suffix('.dict')}"
76 )
77 if target_bed and not target_bed.exists():
78 errors.append(f"target BED does not exist: {target_bed}")
79 if args.bqsr_mode == "force" and not args.known_sites:
80 errors.append("BQSR was forced but no --known-sites VCFs were provided")
81
82 known_sites: list[str] = []
83 for item in args.known_sites:
84 resource = item.expanduser().resolve()
85 known_sites.append(str(resource))
86 if not resource.exists():
87 errors.append(f"known-sites VCF does not exist: {resource}")
88 if (
89 not (Path(str(resource) + ".tbi")).exists()
90 and not (Path(str(resource) + ".csi")).exists()
91 ):
92 warnings.append(
93 f"known-sites VCF index is missing and may be required by GATK: {resource}.tbi"
94 )
95
96 for row_index, row in enumerate(rows, start=2):
97 sample = row.get("sample") or row.get("sample_id") or f"row_{row_index}"
98 bam_raw = row.get("bam") or row.get("cram") or ""
99 if not bam_raw:
100 errors.append(f"row {row_index}: bam or cram column is required")
101 continue
102 bam = Path(bam_raw).expanduser()
103 if not bam.is_absolute():
104 bam = sample_sheet.parent / bam
105 bam = bam.resolve()
106 if not bam.exists():
107 errors.append(f"row {row_index}: alignment file does not exist: {bam}")
108 if bam.suffix == ".bam" and not (Path(str(bam) + ".bai")).exists():
109 warnings.append(
110 f"row {row_index}: BAM index is missing and may be created by samtools index: {bam}.bai"
111 )
112 normalized.append({"sample": sample, "alignment": str(bam), "row_index": str(row_index)})
113
114 if not normalized:
115 errors.append("no usable alignment rows found")

Callers 1

mainFunction · 0.70

Calls 4

bqsr_enabledFunction · 0.85
use_gvcfFunction · 0.85
read_samplesFunction · 0.70
getMethod · 0.45

Tested by

no test coverage detected