Function _normalize_row

benchmark/benchmark_chat_completion.py:208–232 · view source on GitHub ↗

(
    row: dict[str, Any],
    dataset: str,
    row_index: int,
    tokenizer=None,
)

Source from the content-addressed store, hash-verified

206
207
208	def _normalize_row(
209	row: dict[str, Any],
210	dataset: str,
211	row_index: int,
212	tokenizer=None,
213	) -> BenchmarkRequest:
214	request_id = str(row.get('id', f'{dataset}-{row_index}'))
215	messages = _extract_messages(row)
216
217	if tokenizer is not None:
218	prompt_str = tokenizer.apply_chat_template(
219	messages,
220	tokenize=False,
221	add_generation_prompt=True,
222	)
223	return BenchmarkRequest(
224	dataset=dataset,
225	id=request_id,
226	input_ids=tokenizer.encode(prompt_str, add_special_tokens=False),
227	image_data=row.get('image_data'),
228	)
229
230	if not messages:
231	raise ValueError(f'row {row_index} in {dataset} has invalid messages')
232	return BenchmarkRequest(dataset=dataset, id=request_id, messages=messages)
233
234
235	def _read_raw_rows(

load_requestsFunction · 0.70

_extract_messagesFunction · 0.85

BenchmarkRequestClass · 0.85

getMethod · 0.45

encodeMethod · 0.45

no test coverage detected