(path: Path)
| 58 | |
| 59 | |
| 60 | def parse_count_matrix(path: Path) -> tuple[list[str], list[dict[str, str]], dict[str, Any]]: |
| 61 | with path.open(newline="", encoding="utf-8") as handle: |
| 62 | reader = csv.DictReader(handle, delimiter="\t") |
| 63 | columns = reader.fieldnames or [] |
| 64 | if "gene_id" not in columns: |
| 65 | raise ValueError("count matrix must include a gene_id column") |
| 66 | sample_cols = [column for column in columns if column not in {"gene_id", "gene_name"}] |
| 67 | rows = list(reader) |
| 68 | errors = [] |
| 69 | integer_like = True |
| 70 | finite_values = True |
| 71 | min_value = math.inf |
| 72 | max_value = -math.inf |
| 73 | for row_index, row in enumerate(rows, start=2): |
| 74 | for sample in sample_cols: |
| 75 | try: |
| 76 | value = float(row[sample]) |
| 77 | except ValueError: |
| 78 | errors.append(f"row {row_index} sample {sample}: non-numeric expression value") |
| 79 | continue |
| 80 | if not math.isfinite(value): |
| 81 | finite_values = False |
| 82 | errors.append(f"row {row_index} sample {sample}: non-finite expression value") |
| 83 | if abs(value - round(value)) > 1e-8: |
| 84 | integer_like = False |
| 85 | min_value = min(min_value, value) |
| 86 | max_value = max(max_value, value) |
| 87 | return ( |
| 88 | sample_cols, |
| 89 | rows, |
| 90 | { |
| 91 | "errors": errors, |
| 92 | "integer_like": integer_like, |
| 93 | "finite_values": finite_values, |
| 94 | "gene_count": len(rows), |
| 95 | "min_value": None if min_value == math.inf else min_value, |
| 96 | "max_value": None if max_value == -math.inf else max_value, |
| 97 | }, |
| 98 | ) |
| 99 | |
| 100 | |
| 101 | def infer_input_mode( |
no test coverage detected