MCPcopy
hub / github.com/kvcache-ai/ktransformers / _run_impl

Function _run_impl

kt-kernel/python/cli/commands/run.py:170–579  ·  view source on GitHub ↗

Actual implementation of run command.

(
    model: Optional[str],
    host: Optional[str],
    port: Optional[int],
    gpu_experts: Optional[int],
    cpu_threads: Optional[int],
    numa_nodes: Optional[tuple[int, ...]],
    tensor_parallel_size: Optional[int],
    model_path: Optional[Path],
    weights_path: Optional[Path],
    kt_method: Optional[str],
    kt_gpu_prefill_threshold: Optional[int],
    attention_backend: Optional[str],
    max_total_tokens: Optional[int],
    max_running_requests: Optional[int],
    chunked_prefill_size: Optional[int],
    mem_fraction_static: Optional[float],
    watchdog_timeout: Optional[int],
    served_model_name: Optional[str],
    disable_shared_experts_fusion: Optional[bool],
    quantize: bool,
    advanced: bool,
    dry_run: bool,
    extra_cli_args: list[str],
)

Source from the content-addressed store, hash-verified

168
169
170def _run_impl(
171 model: Optional[str],
172 host: Optional[str],
173 port: Optional[int],
174 gpu_experts: Optional[int],
175 cpu_threads: Optional[int],
176 numa_nodes: Optional[tuple[int, ...]],
177 tensor_parallel_size: Optional[int],
178 model_path: Optional[Path],
179 weights_path: Optional[Path],
180 kt_method: Optional[str],
181 kt_gpu_prefill_threshold: Optional[int],
182 attention_backend: Optional[str],
183 max_total_tokens: Optional[int],
184 max_running_requests: Optional[int],
185 chunked_prefill_size: Optional[int],
186 mem_fraction_static: Optional[float],
187 watchdog_timeout: Optional[int],
188 served_model_name: Optional[str],
189 disable_shared_experts_fusion: Optional[bool],
190 quantize: bool,
191 advanced: bool,
192 dry_run: bool,
193 extra_cli_args: list[str],
194) -> None:
195 """Actual implementation of run command."""
196 # Check if SGLang is installed before proceeding
197 from kt_kernel.cli.utils.sglang_checker import (
198 check_sglang_installation,
199 check_sglang_kt_kernel_support,
200 print_sglang_install_instructions,
201 print_sglang_kt_kernel_instructions,
202 )
203
204 sglang_info = check_sglang_installation()
205 if not sglang_info["installed"]:
206 console.print()
207 print_error(t("sglang_not_found"))
208 console.print()
209 print_sglang_install_instructions()
210 raise typer.Exit(1)
211
212 # Check if SGLang supports kt-kernel (has --kt-gpu-prefill-token-threshold parameter)
213 kt_kernel_support = check_sglang_kt_kernel_support()
214 if not kt_kernel_support["supported"]:
215 console.print()
216 print_error(t("sglang_kt_kernel_not_supported"))
217 console.print()
218 print_sglang_kt_kernel_instructions()
219 raise typer.Exit(1)
220
221 settings = get_settings()
222 user_registry = UserModelRegistry()
223
224 # Check if we should use interactive mode
225 # Interactive mode triggers when:
226 # 1. No model specified, OR
227 # 2. Model specified but missing critical parameters (gpu_experts, tensor_parallel_size, etc.)

Callers 1

runFunction · 0.85

Calls 15

find_by_pathMethod · 0.95
get_modelMethod · 0.95
list_modelsMethod · 0.95
print_errorFunction · 0.90
tFunction · 0.90
get_settingsFunction · 0.90
UserModelRegistryClass · 0.90
interactive_run_configFunction · 0.90

Tested by

no test coverage detected