MCPcopy
hub / github.com/IBM/AssetOpsBench / evaluate

Method evaluate

src/evaluation/evaluator.py:41–54  ·  view source on GitHub ↗
(
        self,
        *,
        trajectories_path: Path,
        scenarios_paths: list[Path],
    )

Source from the content-addressed store, hash-verified

39 self.judge_model = judge_model
40
41 def evaluate(
42 self,
43 *,
44 trajectories_path: Path,
45 scenarios_paths: list[Path],
46 ) -> EvalReport:
47 scenarios = load_scenarios(scenarios_paths)
48 trajectories = load_trajectories(trajectories_path)
49
50 results: list[ScenarioResult] = []
51 for scenario, traj in join_records(scenarios, trajectories):
52 results.append(self._score_one(scenario, traj))
53
54 return build_report(results)
55
56 def _score_one(
57 self, scenario: Scenario, traj: PersistedTrajectory

Calls 5

_score_oneMethod · 0.95
load_scenariosFunction · 0.85
load_trajectoriesFunction · 0.85
join_recordsFunction · 0.85
build_reportFunction · 0.85