MCPcopy
hub / github.com/openai/plugins / validate_inputs

Function validate_inputs

plugins/ngs-analysis/scripts/run_bulk_rnaseq_de.py:169–269  ·  view source on GitHub ↗
(args: argparse.Namespace)

Source from the content-addressed store, hash-verified

167
168
169def validate_inputs(args: argparse.Namespace) -> tuple[dict[str, Any], dict[str, Any]]:
170 count_matrix = args.count_matrix.expanduser().resolve()
171 sample_metadata = args.sample_metadata.expanduser().resolve()
172 contrasts = args.contrasts.expanduser().resolve()
173 errors = []
174 warnings = []
175 for label, path in [
176 ("count_matrix", count_matrix),
177 ("sample_metadata", sample_metadata),
178 ("contrasts", contrasts),
179 ]:
180 if not path.exists():
181 errors.append(f"{label} does not exist: {path}")
182 if errors:
183 return {"ok": False, "errors": errors, "warnings": warnings}, {}
184
185 sample_cols, _, matrix_status = parse_count_matrix(count_matrix)
186 errors.extend(matrix_status["errors"])
187 metadata_rows = read_tsv(sample_metadata)
188 contrast_rows = read_tsv(contrasts)
189
190 metadata_samples = [row.get("sample_id", "") for row in metadata_rows]
191 if len(metadata_samples) != len(set(metadata_samples)):
192 errors.append("sample metadata contains duplicate sample_id values")
193 if set(sample_cols) != set(metadata_samples):
194 errors.append("count matrix sample columns and metadata sample_id values do not match")
195 if "condition" not in (metadata_rows[0].keys() if metadata_rows else []):
196 errors.append("sample metadata must include a condition column")
197
198 condition_counts: dict[str, int] = {}
199 for row in metadata_rows:
200 condition = row.get("condition", "")
201 if not condition:
202 errors.append(f"sample {row.get('sample_id', '<missing>')} has no condition")
203 condition_counts[condition] = condition_counts.get(condition, 0) + 1
204
205 contrast_status = []
206 required_contrast_cols = {"contrast", "numerator_condition", "denominator_condition"}
207 if contrast_rows and not required_contrast_cols.issubset(contrast_rows[0].keys()):
208 errors.append(
209 "contrasts file must include contrast, numerator_condition, and denominator_condition columns"
210 )
211 for row in contrast_rows:
212 numerator = row.get("numerator_condition", "")
213 denominator = row.get("denominator_condition", "")
214 numerator_n = condition_counts.get(numerator, 0)
215 denominator_n = condition_counts.get(denominator, 0)
216 status = "valid" if numerator_n >= 2 and denominator_n >= 2 else "insufficient_replicates"
217 if status == "valid" and numerator_n == 2 and denominator_n == 2:
218 warnings.append(
219 f"Contrast {row.get('contrast', '')} is minimally powered (2 vs 2 replicates); treat p-values and effect sizes as exploratory and review QC plots carefully."
220 )
221 contrast_status.append(
222 {
223 "contrast": row.get("contrast", ""),
224 "numerator_condition": numerator,
225 "denominator_condition": denominator,
226 "numerator_replicates": numerator_n,

Callers 1

mainFunction · 0.70

Calls 8

parse_count_matrixFunction · 0.85
read_tsvFunction · 0.85
r_package_availableFunction · 0.85
infer_input_modeFunction · 0.85
select_methodFunction · 0.85
build_fit_formulaFunction · 0.85
extendMethod · 0.80
getMethod · 0.45

Tested by

no test coverage detected