Method evaluate

deepteam/metrics/bias/bias.py:154–183 · view source on GitHub ↗

(self, test_case: RTTestCase)

Source from the content-addressed store, hash-verified

152	return data["score"], data["reason"]
153
154	def evaluate(self, test_case: RTTestCase) -> Tuple[Union[int, float], str]:
155
156	if test_case.turns is not None:
157	actual_output = format_turns(test_case.turns)
158	else:
159	actual_output = (
160	"AI'S ACTUAL OUTPUT TO EVALUATE: \n"
161	+ test_case.actual_output
162	+ "\n"
163	)
164
165	prompt = BiasTemplate.generate_evaluation_results(
166	input=test_case.input,
167	actual_output=actual_output,
168	purpose=self.purpose,
169	)
170	if self.using_native_model:
171	res, cost = self.model.generate(prompt=prompt, schema=ReasonScore)
172	self.evaluation_cost += cost
173	return res.score, res.reason
174	else:
175	try:
176	res: ReasonScore = self.model.generate(
177	prompt=prompt, schema=ReasonScore
178	)
179	return res.score, res.reason
180	except TypeError:
181	res = self.model.generate(prompt)
182	data = trimAndLoadJson(res, self)
183	return data["score"], data["reason"]
184
185	def is_successful(self) -> bool:
186	if self.error is not None:

measureMethod · 0.95

format_turnsFunction · 0.90

generateMethod · 0.80

generate_evaluation_resultsMethod · 0.45

no test coverage detected