(
args: argparse.Namespace,
)
| 331 | |
| 332 | |
| 333 | def normalize_samples( |
| 334 | args: argparse.Namespace, |
| 335 | ) -> tuple[dict[str, Any], list[dict[str, str]], list[Path]]: |
| 336 | sample_sheet = args.sample_sheet.expanduser().resolve() |
| 337 | rows, columns = read_table(sample_sheet) |
| 338 | roots = [root.expanduser().resolve() for root in args.fastq_root] |
| 339 | roots.extend([sample_sheet.parent, Path.cwd()]) |
| 340 | normalized: list[dict[str, str]] = [] |
| 341 | fastq_paths: list[Path] = [] |
| 342 | errors: list[str] = [] |
| 343 | warnings: list[str] = [] |
| 344 | fastq_checks = [] |
| 345 | |
| 346 | for row_index, row in enumerate(rows, start=2): |
| 347 | sample = ( |
| 348 | first_present(row, ["sample", "sample_id", "sampleID", "run_accession"]) |
| 349 | or f"row_{row_index}" |
| 350 | ) |
| 351 | r1_raw = first_present(row, ["fastq_1", "forwardReads", "r1", "read1"]) |
| 352 | r2_raw = first_present(row, ["fastq_2", "reverseReads", "r2", "read2"]) |
| 353 | fasta_raw = first_present(row, ["fasta"]) |
| 354 | if not r1_raw and not fasta_raw: |
| 355 | errors.append(f"row {row_index}: fastq_1/forwardReads or fasta is required") |
| 356 | continue |
| 357 | r1 = resolve_existing_path(r1_raw, sample_sheet.parent, roots) if r1_raw else None |
| 358 | r2 = resolve_existing_path(r2_raw, sample_sheet.parent, roots) if r2_raw else None |
| 359 | fasta = resolve_existing_path(fasta_raw, sample_sheet.parent, roots) if fasta_raw else None |
| 360 | if r1_raw and not r1: |
| 361 | errors.append(f"row {row_index}: could not resolve read 1 path {r1_raw}") |
| 362 | if r2_raw and not r2: |
| 363 | errors.append(f"row {row_index}: could not resolve read 2 path {r2_raw}") |
| 364 | if fasta_raw and not fasta: |
| 365 | errors.append(f"row {row_index}: could not resolve fasta path {fasta_raw}") |
| 366 | for read_label, read_path in [("r1", r1), ("r2", r2)]: |
| 367 | if read_path is None: |
| 368 | continue |
| 369 | fastq_paths.append(read_path) |
| 370 | check = check_fastq(read_path, args.fastq_record_check) |
| 371 | check["sample"] = sample |
| 372 | check["read"] = read_label |
| 373 | fastq_checks.append(check) |
| 374 | if check["errors"]: |
| 375 | errors.extend(f"{sample} {read_label}: {error}" for error in check["errors"]) |
| 376 | normalized.append( |
| 377 | { |
| 378 | "sample": sample, |
| 379 | "row_index": str(row_index), |
| 380 | "fastq_1": str(r1) if r1 else "", |
| 381 | "fastq_2": str(r2) if r2 else "", |
| 382 | "fasta": str(fasta) if fasta else "", |
| 383 | "layout": "PE" if r2 else ("SE" if r1 else "FASTA"), |
| 384 | "marker": first_present(row, ["marker", "target", "region"]), |
| 385 | "assay": first_present(row, ["assay", "library_strategy"]) or args.lane, |
| 386 | "instrument_platform": first_present(row, ["instrument_platform", "platform"]), |
| 387 | "host_organism": first_present( |
| 388 | row, ["host_organism", "host", "host_species", "organism"] |
| 389 | ), |
| 390 | "genome_build": first_present( |
no test coverage detected