(
row: dict[str, Any],
dataset: str,
row_index: int,
tokenizer=None,
)
| 206 | |
| 207 | |
| 208 | def _normalize_row( |
| 209 | row: dict[str, Any], |
| 210 | dataset: str, |
| 211 | row_index: int, |
| 212 | tokenizer=None, |
| 213 | ) -> BenchmarkRequest: |
| 214 | request_id = str(row.get('id', f'{dataset}-{row_index}')) |
| 215 | messages = _extract_messages(row) |
| 216 | |
| 217 | if tokenizer is not None: |
| 218 | prompt_str = tokenizer.apply_chat_template( |
| 219 | messages, |
| 220 | tokenize=False, |
| 221 | add_generation_prompt=True, |
| 222 | ) |
| 223 | return BenchmarkRequest( |
| 224 | dataset=dataset, |
| 225 | id=request_id, |
| 226 | input_ids=tokenizer.encode(prompt_str, add_special_tokens=False), |
| 227 | image_data=row.get('image_data'), |
| 228 | ) |
| 229 | |
| 230 | if not messages: |
| 231 | raise ValueError(f'row {row_index} in {dataset} has invalid messages') |
| 232 | return BenchmarkRequest(dataset=dataset, id=request_id, messages=messages) |
| 233 | |
| 234 | |
| 235 | def _read_raw_rows( |
no test coverage detected