(args: argparse.Namespace)
| 56 | |
| 57 | |
| 58 | def validate_inputs(args: argparse.Namespace) -> tuple[dict[str, Any], list[dict[str, str]]]: |
| 59 | sample_sheet = args.sample_sheet.expanduser().resolve() |
| 60 | reference = args.reference_fasta.expanduser().resolve() |
| 61 | target_bed = args.target_bed.expanduser().resolve() if args.target_bed else None |
| 62 | rows, columns = read_samples(sample_sheet) |
| 63 | errors: list[str] = [] |
| 64 | warnings: list[str] = [] |
| 65 | normalized: list[dict[str, str]] = [] |
| 66 | |
| 67 | if not reference.exists(): |
| 68 | errors.append(f"reference FASTA does not exist: {reference}") |
| 69 | if not (Path(str(reference) + ".fai")).exists(): |
| 70 | warnings.append( |
| 71 | f"reference FASTA index is missing and may be created by samtools faidx: {reference}.fai" |
| 72 | ) |
| 73 | if not (reference.with_suffix(".dict")).exists(): |
| 74 | warnings.append( |
| 75 | f"reference sequence dictionary is missing and may be created by GATK: {reference.with_suffix('.dict')}" |
| 76 | ) |
| 77 | if target_bed and not target_bed.exists(): |
| 78 | errors.append(f"target BED does not exist: {target_bed}") |
| 79 | if args.bqsr_mode == "force" and not args.known_sites: |
| 80 | errors.append("BQSR was forced but no --known-sites VCFs were provided") |
| 81 | |
| 82 | known_sites: list[str] = [] |
| 83 | for item in args.known_sites: |
| 84 | resource = item.expanduser().resolve() |
| 85 | known_sites.append(str(resource)) |
| 86 | if not resource.exists(): |
| 87 | errors.append(f"known-sites VCF does not exist: {resource}") |
| 88 | if ( |
| 89 | not (Path(str(resource) + ".tbi")).exists() |
| 90 | and not (Path(str(resource) + ".csi")).exists() |
| 91 | ): |
| 92 | warnings.append( |
| 93 | f"known-sites VCF index is missing and may be required by GATK: {resource}.tbi" |
| 94 | ) |
| 95 | |
| 96 | for row_index, row in enumerate(rows, start=2): |
| 97 | sample = row.get("sample") or row.get("sample_id") or f"row_{row_index}" |
| 98 | bam_raw = row.get("bam") or row.get("cram") or "" |
| 99 | if not bam_raw: |
| 100 | errors.append(f"row {row_index}: bam or cram column is required") |
| 101 | continue |
| 102 | bam = Path(bam_raw).expanduser() |
| 103 | if not bam.is_absolute(): |
| 104 | bam = sample_sheet.parent / bam |
| 105 | bam = bam.resolve() |
| 106 | if not bam.exists(): |
| 107 | errors.append(f"row {row_index}: alignment file does not exist: {bam}") |
| 108 | if bam.suffix == ".bam" and not (Path(str(bam) + ".bai")).exists(): |
| 109 | warnings.append( |
| 110 | f"row {row_index}: BAM index is missing and may be created by samtools index: {bam}.bai" |
| 111 | ) |
| 112 | normalized.append({"sample": sample, "alignment": str(bam), "row_index": str(row_index)}) |
| 113 | |
| 114 | if not normalized: |
| 115 | errors.append("no usable alignment rows found") |
no test coverage detected