hub / github.com/openai/plugins / normalize_samples

Function normalize_samples

plugins/ngs-analysis/scripts/run_fastq_assay_package.py:333–437 · view source on GitHub ↗

(
    args: argparse.Namespace,
)

Source from the content-addressed store, hash-verified

331
332
333	def normalize_samples(
334	args: argparse.Namespace,
335	) -> tuple[dict[str, Any], list[dict[str, str]], list[Path]]:
336	sample_sheet = args.sample_sheet.expanduser().resolve()
337	rows, columns = read_table(sample_sheet)
338	roots = [root.expanduser().resolve() for root in args.fastq_root]
339	roots.extend([sample_sheet.parent, Path.cwd()])
340	normalized: list[dict[str, str]] = []
341	fastq_paths: list[Path] = []
342	errors: list[str] = []
343	warnings: list[str] = []
344	fastq_checks = []
345
346	for row_index, row in enumerate(rows, start=2):
347	sample = (
348	first_present(row, ["sample", "sample_id", "sampleID", "run_accession"])
349	or f"row_{row_index}"
350	)
351	r1_raw = first_present(row, ["fastq_1", "forwardReads", "r1", "read1"])
352	r2_raw = first_present(row, ["fastq_2", "reverseReads", "r2", "read2"])
353	fasta_raw = first_present(row, ["fasta"])
354	if not r1_raw and not fasta_raw:
355	errors.append(f"row {row_index}: fastq_1/forwardReads or fasta is required")
356	continue
357	r1 = resolve_existing_path(r1_raw, sample_sheet.parent, roots) if r1_raw else None
358	r2 = resolve_existing_path(r2_raw, sample_sheet.parent, roots) if r2_raw else None
359	fasta = resolve_existing_path(fasta_raw, sample_sheet.parent, roots) if fasta_raw else None
360	if r1_raw and not r1:
361	errors.append(f"row {row_index}: could not resolve read 1 path {r1_raw}")
362	if r2_raw and not r2:
363	errors.append(f"row {row_index}: could not resolve read 2 path {r2_raw}")
364	if fasta_raw and not fasta:
365	errors.append(f"row {row_index}: could not resolve fasta path {fasta_raw}")
366	for read_label, read_path in [("r1", r1), ("r2", r2)]:
367	if read_path is None:
368	continue
369	fastq_paths.append(read_path)
370	check = check_fastq(read_path, args.fastq_record_check)
371	check["sample"] = sample
372	check["read"] = read_label
373	fastq_checks.append(check)
374	if check["errors"]:
375	errors.extend(f"{sample} {read_label}: {error}" for error in check["errors"])
376	normalized.append(
377	{
378	"sample": sample,
379	"row_index": str(row_index),
380	"fastq_1": str(r1) if r1 else "",
381	"fastq_2": str(r2) if r2 else "",
382	"fasta": str(fasta) if fasta else "",
383	"layout": "PE" if r2 else ("SE" if r1 else "FASTA"),
384	"marker": first_present(row, ["marker", "target", "region"]),
385	"assay": first_present(row, ["assay", "library_strategy"]) or args.lane,
386	"instrument_platform": first_present(row, ["instrument_platform", "platform"]),
387	"host_organism": first_present(
388	row, ["host_organism", "host", "host_species", "organism"]
389	),
390	"genome_build": first_present(

Callers 1

mainFunction · 0.85

Calls 7

metadata_warningsFunction · 0.85

summarize_input_contextFunction · 0.85

extendMethod · 0.80

read_tableFunction · 0.70

first_presentFunction · 0.70

resolve_existing_pathFunction · 0.70

check_fastqFunction · 0.70

Tested by

no test coverage detected