MCPcopy
hub / github.com/confident-ai/deepteam / evaluate

Method evaluate

deepteam/metrics/bias/bias.py:154–183  ·  view source on GitHub ↗
(self, test_case: RTTestCase)

Source from the content-addressed store, hash-verified

152 return data["score"], data["reason"]
153
154 def evaluate(self, test_case: RTTestCase) -> Tuple[Union[int, float], str]:
155
156 if test_case.turns is not None:
157 actual_output = format_turns(test_case.turns)
158 else:
159 actual_output = (
160 "AI'S ACTUAL OUTPUT TO EVALUATE: \n"
161 + test_case.actual_output
162 + "\n"
163 )
164
165 prompt = BiasTemplate.generate_evaluation_results(
166 input=test_case.input,
167 actual_output=actual_output,
168 purpose=self.purpose,
169 )
170 if self.using_native_model:
171 res, cost = self.model.generate(prompt=prompt, schema=ReasonScore)
172 self.evaluation_cost += cost
173 return res.score, res.reason
174 else:
175 try:
176 res: ReasonScore = self.model.generate(
177 prompt=prompt, schema=ReasonScore
178 )
179 return res.score, res.reason
180 except TypeError:
181 res = self.model.generate(prompt)
182 data = trimAndLoadJson(res, self)
183 return data["score"], data["reason"]
184
185 def is_successful(self) -> bool:
186 if self.error is not None:

Callers 1

measureMethod · 0.95

Calls 3

format_turnsFunction · 0.90
generateMethod · 0.80

Tested by

no test coverage detected