Function compare_answers

scripts/eval_imobench_answer.py:91–120 · view source on GitHub ↗

Compare predicted answer with ground truth Uses both exact match and semantic equivalence

(predicted: str, ground_truth: str)

Source from the content-addressed store, hash-verified

89
90
91	def compare_answers(predicted: str, ground_truth: str) -> bool:
92	"""
93	Compare predicted answer with ground truth
94	Uses both exact match and semantic equivalence
95	"""
96	if not predicted or not ground_truth:
97	return False
98
99	# Normalize both answers
100	pred_norm = normalize_answer(predicted)
101	truth_norm = normalize_answer(ground_truth)
102
103	# Exact match after normalization
104	if pred_norm == truth_norm:
105	return True
106
107	# Check if one contains the other (for cases like "4" in "c = 4")
108	if pred_norm in truth_norm or truth_norm in pred_norm:
109	return True
110
111	# Try numeric comparison if possible
112	try:
113	pred_num = float(re.sub(r'[^0-9.-]', '', predicted))
114	truth_num = float(re.sub(r'[^0-9.-]', '', ground_truth))
115	if abs(pred_num - truth_num) < 1e-6:
116	return True
117	except (ValueError, TypeError):
118	pass
119
120	return False
121
122
123	def extract_answer_from_solution(solution: str, problem_id: str = None) -> str:

mainFunction · 0.70

normalize_answerFunction · 0.70

no test coverage detected