(run_dir: Path, args: argparse.Namespace, rows: list[dict[str, str]])
| 234 | |
| 235 | |
| 236 | def execute(run_dir: Path, args: argparse.Namespace, rows: list[dict[str, str]]) -> dict[str, Any]: |
| 237 | reference = args.reference_fasta.expanduser().resolve() |
| 238 | results: dict[str, Any] = {"ok": True, "steps": []} |
| 239 | (run_dir / "qc").mkdir(parents=True, exist_ok=True) |
| 240 | (run_dir / "recal").mkdir(parents=True, exist_ok=True) |
| 241 | (run_dir / "gvcf").mkdir(parents=True, exist_ok=True) |
| 242 | (run_dir / "variants").mkdir(parents=True, exist_ok=True) |
| 243 | (run_dir / "joint").mkdir(parents=True, exist_ok=True) |
| 244 | |
| 245 | if not (Path(str(reference) + ".fai")).exists(): |
| 246 | faidx = run_cmd(["samtools", "faidx", str(reference)], run_dir, timeout=600) |
| 247 | write_json(run_dir / "logs" / "samtools_faidx.json", faidx) |
| 248 | results["steps"].append({"name": "samtools_faidx", "ok": faidx.get("ok")}) |
| 249 | results["ok"] = bool(results["ok"] and faidx.get("ok")) |
| 250 | |
| 251 | reference_dict = reference.with_suffix(".dict") |
| 252 | if not reference_dict.exists(): |
| 253 | create_dict = run_cmd( |
| 254 | ["gatk", "CreateSequenceDictionary", "-R", str(reference), "-O", str(reference_dict)], |
| 255 | run_dir, |
| 256 | timeout=600, |
| 257 | ) |
| 258 | write_json(run_dir / "logs" / "gatk_create_dict.json", create_dict) |
| 259 | results["steps"].append({"name": "gatk_create_dict", "ok": create_dict.get("ok")}) |
| 260 | results["ok"] = bool(results["ok"] and create_dict.get("ok")) |
| 261 | |
| 262 | gvcfs: list[Path] = [] |
| 263 | for row in rows: |
| 264 | sample = row["sample"] |
| 265 | input_bam = Path(row["alignment"]) |
| 266 | if bqsr_enabled(args): |
| 267 | known_sites_args = [] |
| 268 | for resource in args.known_sites: |
| 269 | known_sites_args.extend(["--known-sites", str(resource.expanduser().resolve())]) |
| 270 | recal_table = run_dir / "recal" / f"{sample}.recal.table" |
| 271 | recal = run_cmd( |
| 272 | [ |
| 273 | "gatk", |
| 274 | "BaseRecalibrator", |
| 275 | "-R", |
| 276 | str(reference), |
| 277 | "-I", |
| 278 | str(input_bam), |
| 279 | *known_sites_args, |
| 280 | "-O", |
| 281 | str(recal_table), |
| 282 | ], |
| 283 | run_dir, |
| 284 | timeout=3600, |
| 285 | ) |
| 286 | write_json(run_dir / "logs" / f"{sample}.base_recalibrator.json", recal) |
| 287 | apply_bqsr = run_cmd( |
| 288 | [ |
| 289 | "gatk", |
| 290 | "ApplyBQSR", |
| 291 | "-R", |
| 292 | str(reference), |
| 293 | "-I", |
no test coverage detected