MCPcopy
hub / github.com/InternLM/lmdeploy / load_requests

Function load_requests

benchmark/benchmark_chat_completion.py:273–302  ·  view source on GitHub ↗

Load JSONL chat requests. Rows with list-type ``prompt`` (e.g. dapo-math-17k) are treated as message lists, matching ``benchmark_generate.py``. When ``tokenizer`` is provided (``--input-ids``), rows are converted to ``input_ids`` client-side.

(
    dataset_dir: str | Path | None = None,
    dataset_files: Sequence[str | Path] | None = None,
    datasets: Sequence[str] | None = None,
    num_prompts: int | None = None,
    shuffle: bool = False,
    seed: int = 1,
    tokenizer=None,
)

Source from the content-addressed store, hash-verified

271
272
273def load_requests(
274 dataset_dir: str | Path | None = None,
275 dataset_files: Sequence[str | Path] | None = None,
276 datasets: Sequence[str] | None = None,
277 num_prompts: int | None = None,
278 shuffle: bool = False,
279 seed: int = 1,
280 tokenizer=None,
281) -> list[BenchmarkRequest]:
282 """Load JSONL chat requests.
283
284 Rows with list-type ``prompt`` (e.g. dapo-math-17k) are treated as message lists,
285 matching ``benchmark_generate.py``. When ``tokenizer`` is provided (``--input-ids``),
286 rows are converted to ``input_ids`` client-side.
287 """
288 raw_rows = _read_raw_rows(
289 dataset_dir=dataset_dir,
290 dataset_files=dataset_files,
291 datasets=datasets,
292 num_prompts=num_prompts,
293 shuffle=shuffle,
294 )
295 if shuffle:
296 random.Random(seed).shuffle(raw_rows)
297 if num_prompts is not None:
298 raw_rows = raw_rows[:num_prompts]
299 if not raw_rows:
300 raise ValueError('No benchmark requests were loaded.')
301
302 return [_normalize_row(row, dataset, row_index, tokenizer) for row, dataset, row_index in raw_rows]
303
304
305def parse_sse_line(line: bytes | str) -> SSEEvent:

Callers 1

run_benchmarkFunction · 0.70

Calls 2

_read_raw_rowsFunction · 0.85
_normalize_rowFunction · 0.70

Tested by

no test coverage detected