MCPcopy
hub / github.com/InternLM/lmdeploy / _normalize_row

Function _normalize_row

benchmark/benchmark_chat_completion.py:208–232  ·  view source on GitHub ↗
(
    row: dict[str, Any],
    dataset: str,
    row_index: int,
    tokenizer=None,
)

Source from the content-addressed store, hash-verified

206
207
208def _normalize_row(
209 row: dict[str, Any],
210 dataset: str,
211 row_index: int,
212 tokenizer=None,
213) -> BenchmarkRequest:
214 request_id = str(row.get('id', f'{dataset}-{row_index}'))
215 messages = _extract_messages(row)
216
217 if tokenizer is not None:
218 prompt_str = tokenizer.apply_chat_template(
219 messages,
220 tokenize=False,
221 add_generation_prompt=True,
222 )
223 return BenchmarkRequest(
224 dataset=dataset,
225 id=request_id,
226 input_ids=tokenizer.encode(prompt_str, add_special_tokens=False),
227 image_data=row.get('image_data'),
228 )
229
230 if not messages:
231 raise ValueError(f'row {row_index} in {dataset} has invalid messages')
232 return BenchmarkRequest(dataset=dataset, id=request_id, messages=messages)
233
234
235def _read_raw_rows(

Callers 1

load_requestsFunction · 0.70

Calls 4

_extract_messagesFunction · 0.85
BenchmarkRequestClass · 0.85
getMethod · 0.45
encodeMethod · 0.45

Tested by

no test coverage detected