(
self,
*,
trajectories_path: Path,
scenarios_paths: list[Path],
)
| 39 | self.judge_model = judge_model |
| 40 | |
| 41 | def evaluate( |
| 42 | self, |
| 43 | *, |
| 44 | trajectories_path: Path, |
| 45 | scenarios_paths: list[Path], |
| 46 | ) -> EvalReport: |
| 47 | scenarios = load_scenarios(scenarios_paths) |
| 48 | trajectories = load_trajectories(trajectories_path) |
| 49 | |
| 50 | results: list[ScenarioResult] = [] |
| 51 | for scenario, traj in join_records(scenarios, trajectories): |
| 52 | results.append(self._score_one(scenario, traj)) |
| 53 | |
| 54 | return build_report(results) |
| 55 | |
| 56 | def _score_one( |
| 57 | self, scenario: Scenario, traj: PersistedTrajectory |