(ctx, dataset_path: str, prompts_dir: Optional[str],
num_samples: Optional[int], start_idx: int,
difficulty: Optional[str], length: str, domain: Optional[str],
cot: bool, no_context: bool, rag: int,
output_dir: Optional[str], random_seed: int,
apply_chat_template: bool, system_prompt: Optional[str],
max_input_length: int, max_output_length: int,
chat_template_kwargs: Optional[dict[str, Any]],
temperature: float, top_p: float)
| 758 | @click.pass_context |
| 759 | @staticmethod |
| 760 | def command(ctx, dataset_path: str, prompts_dir: Optional[str], |
| 761 | num_samples: Optional[int], start_idx: int, |
| 762 | difficulty: Optional[str], length: str, domain: Optional[str], |
| 763 | cot: bool, no_context: bool, rag: int, |
| 764 | output_dir: Optional[str], random_seed: int, |
| 765 | apply_chat_template: bool, system_prompt: Optional[str], |
| 766 | max_input_length: int, max_output_length: int, |
| 767 | chat_template_kwargs: Optional[dict[str, Any]], |
| 768 | temperature: float, top_p: float) -> None: |
| 769 | llm: Union[LLM, PyTorchLLM] = ctx.obj |
| 770 | |
| 771 | sampling_params = SamplingParams(max_tokens=max_output_length, |
| 772 | temperature=temperature, |
| 773 | top_p=top_p) |
| 774 | |
| 775 | evaluator = LongBenchV2(dataset_path=dataset_path, |
| 776 | prompts_dir=prompts_dir, |
| 777 | num_samples=num_samples, |
| 778 | start_idx=start_idx, |
| 779 | difficulty=difficulty, |
| 780 | length=length, |
| 781 | domain=domain, |
| 782 | cot=cot, |
| 783 | no_context=no_context, |
| 784 | rag=rag, |
| 785 | max_input_length=max_input_length, |
| 786 | output_dir=output_dir, |
| 787 | random_seed=random_seed, |
| 788 | apply_chat_template=apply_chat_template, |
| 789 | system_prompt=system_prompt, |
| 790 | chat_template_kwargs=chat_template_kwargs) |
| 791 | |
| 792 | evaluator.evaluate(llm, sampling_params) |
| 793 | llm.shutdown() |
nothing calls this directly
no test coverage detected