(ctx, dataset_path: Optional[str], num_samples: int,
num_fewshot: int, random_seed: int, apply_chat_template: bool,
chat_template_kwargs: Optional[dict[str, Any]],
system_prompt: Optional[str], max_input_length: int,
max_output_length: int, check_accuracy: bool,
accuracy_threshold: float)
| 305 | @click.pass_context |
| 306 | @staticmethod |
| 307 | def command(ctx, dataset_path: Optional[str], num_samples: int, |
| 308 | num_fewshot: int, random_seed: int, apply_chat_template: bool, |
| 309 | chat_template_kwargs: Optional[dict[str, Any]], |
| 310 | system_prompt: Optional[str], max_input_length: int, |
| 311 | max_output_length: int, check_accuracy: bool, |
| 312 | accuracy_threshold: float) -> None: |
| 313 | llm: Union[LLM, PyTorchLLM] = ctx.obj |
| 314 | sampling_params = SamplingParams( |
| 315 | max_tokens=max_output_length, |
| 316 | truncate_prompt_tokens=max_input_length) |
| 317 | evaluator = MMLU(dataset_path, |
| 318 | num_samples=num_samples, |
| 319 | num_fewshot=num_fewshot, |
| 320 | random_seed=random_seed, |
| 321 | apply_chat_template=apply_chat_template, |
| 322 | system_prompt=system_prompt, |
| 323 | chat_template_kwargs=chat_template_kwargs) |
| 324 | accuracy = evaluator.evaluate(llm, sampling_params) |
| 325 | llm.shutdown() |
| 326 | |
| 327 | if check_accuracy: |
| 328 | logger.warning( |
| 329 | "The --check_accuracy flag is not expected to be used anymore. " |
| 330 | "It is being used by some legacy accuracy tests that call evaluation commands via subprocess. " |
| 331 | "New accuracy tests should use LLM API within the pytest process; please see `tests/integration/defs/accuracy/README.md`." |
| 332 | ) |
| 333 | assert accuracy >= accuracy_threshold, f"Expected accuracy >= {accuracy_threshold}, but got {accuracy}." |
nothing calls this directly
no test coverage detected