(args: argparse.Namespace)
| 167 | |
| 168 | |
| 169 | def validate_inputs(args: argparse.Namespace) -> tuple[dict[str, Any], dict[str, Any]]: |
| 170 | count_matrix = args.count_matrix.expanduser().resolve() |
| 171 | sample_metadata = args.sample_metadata.expanduser().resolve() |
| 172 | contrasts = args.contrasts.expanduser().resolve() |
| 173 | errors = [] |
| 174 | warnings = [] |
| 175 | for label, path in [ |
| 176 | ("count_matrix", count_matrix), |
| 177 | ("sample_metadata", sample_metadata), |
| 178 | ("contrasts", contrasts), |
| 179 | ]: |
| 180 | if not path.exists(): |
| 181 | errors.append(f"{label} does not exist: {path}") |
| 182 | if errors: |
| 183 | return {"ok": False, "errors": errors, "warnings": warnings}, {} |
| 184 | |
| 185 | sample_cols, _, matrix_status = parse_count_matrix(count_matrix) |
| 186 | errors.extend(matrix_status["errors"]) |
| 187 | metadata_rows = read_tsv(sample_metadata) |
| 188 | contrast_rows = read_tsv(contrasts) |
| 189 | |
| 190 | metadata_samples = [row.get("sample_id", "") for row in metadata_rows] |
| 191 | if len(metadata_samples) != len(set(metadata_samples)): |
| 192 | errors.append("sample metadata contains duplicate sample_id values") |
| 193 | if set(sample_cols) != set(metadata_samples): |
| 194 | errors.append("count matrix sample columns and metadata sample_id values do not match") |
| 195 | if "condition" not in (metadata_rows[0].keys() if metadata_rows else []): |
| 196 | errors.append("sample metadata must include a condition column") |
| 197 | |
| 198 | condition_counts: dict[str, int] = {} |
| 199 | for row in metadata_rows: |
| 200 | condition = row.get("condition", "") |
| 201 | if not condition: |
| 202 | errors.append(f"sample {row.get('sample_id', '<missing>')} has no condition") |
| 203 | condition_counts[condition] = condition_counts.get(condition, 0) + 1 |
| 204 | |
| 205 | contrast_status = [] |
| 206 | required_contrast_cols = {"contrast", "numerator_condition", "denominator_condition"} |
| 207 | if contrast_rows and not required_contrast_cols.issubset(contrast_rows[0].keys()): |
| 208 | errors.append( |
| 209 | "contrasts file must include contrast, numerator_condition, and denominator_condition columns" |
| 210 | ) |
| 211 | for row in contrast_rows: |
| 212 | numerator = row.get("numerator_condition", "") |
| 213 | denominator = row.get("denominator_condition", "") |
| 214 | numerator_n = condition_counts.get(numerator, 0) |
| 215 | denominator_n = condition_counts.get(denominator, 0) |
| 216 | status = "valid" if numerator_n >= 2 and denominator_n >= 2 else "insufficient_replicates" |
| 217 | if status == "valid" and numerator_n == 2 and denominator_n == 2: |
| 218 | warnings.append( |
| 219 | f"Contrast {row.get('contrast', '')} is minimally powered (2 vs 2 replicates); treat p-values and effect sizes as exploratory and review QC plots carefully." |
| 220 | ) |
| 221 | contrast_status.append( |
| 222 | { |
| 223 | "contrast": row.get("contrast", ""), |
| 224 | "numerator_condition": numerator, |
| 225 | "denominator_condition": denominator, |
| 226 | "numerator_replicates": numerator_n, |
no test coverage detected