(model: Optional[str] = None)
| 16 | |
| 17 | |
| 18 | def run_MTP(model: Optional[str] = None): |
| 19 | spec_config = MTPDecodingConfig(num_nextn_predict_layers=1, |
| 20 | use_relaxed_acceptance_for_thinking=True, |
| 21 | relaxed_topk=10, |
| 22 | relaxed_delta=0.01) |
| 23 | |
| 24 | llm = LLM( |
| 25 | # You can change this to a local model path if you have the model downloaded |
| 26 | model=model or "nvidia/DeepSeek-R1-FP4", |
| 27 | speculative_config=spec_config, |
| 28 | ) |
| 29 | |
| 30 | for prompt in prompts: |
| 31 | response = llm.generate(prompt, SamplingParams(max_tokens=10)) |
| 32 | print(response.outputs[0].text) |
| 33 | |
| 34 | |
| 35 | def run_Eagle3(): |
no test coverage detected