Write a per-run evaluation record when ``AGENT_TRAJECTORY_DIR`` is set. Reads ``run_id`` / ``scenario_id`` from the same contextvars used by :func:`agent_run_span`, so CLI-level wiring doesn't have to touch the runner's public signature. Returns the output path, or ``None`` when pe
(
*,
runner_name: str,
model: str,
question: str,
answer: str,
trajectory: Any,
)
| 37 | |
| 38 | |
| 39 | def persist_trajectory( |
| 40 | *, |
| 41 | runner_name: str, |
| 42 | model: str, |
| 43 | question: str, |
| 44 | answer: str, |
| 45 | trajectory: Any, |
| 46 | ) -> Path | None: |
| 47 | """Write a per-run evaluation record when ``AGENT_TRAJECTORY_DIR`` is set. |
| 48 | |
| 49 | Reads ``run_id`` / ``scenario_id`` from the same contextvars used by |
| 50 | :func:`agent_run_span`, so CLI-level wiring doesn't have to touch the |
| 51 | runner's public signature. |
| 52 | |
| 53 | Returns the output path, or ``None`` when persistence is disabled. |
| 54 | """ |
| 55 | dir_env = os.environ.get(_TRAJECTORY_DIR_ENV) |
| 56 | if not dir_env: |
| 57 | return None |
| 58 | |
| 59 | run_id = _run_id_var.get() |
| 60 | if not run_id: |
| 61 | _log.warning( |
| 62 | "%s is set but no run_id in context; skipping trajectory persist", |
| 63 | _TRAJECTORY_DIR_ENV, |
| 64 | ) |
| 65 | return None |
| 66 | |
| 67 | out_dir = Path(dir_env) |
| 68 | out_dir.mkdir(parents=True, exist_ok=True) |
| 69 | out_path = out_dir / f"{run_id}.json" |
| 70 | |
| 71 | record = { |
| 72 | "run_id": run_id, |
| 73 | "scenario_id": _scenario_id_var.get(), |
| 74 | "runner": runner_name, |
| 75 | "model": model, |
| 76 | "question": question, |
| 77 | "answer": answer, |
| 78 | "trajectory": _serialize_trajectory(trajectory), |
| 79 | } |
| 80 | |
| 81 | try: |
| 82 | out_path.write_text(json.dumps(record, indent=2, default=str), encoding="utf-8") |
| 83 | except OSError: |
| 84 | _log.exception("persist_trajectory: write failed at %s", out_path) |
| 85 | return None |
| 86 | return out_path |
| 87 | |
| 88 | |
| 89 | def _serialize_trajectory(trajectory: Any) -> Any: |