Test if a configuration with given num_gpu_experts works. Args: num_gpu_experts: Number of GPU experts to test model_path: Path to the model config: Configuration dict with all parameters verbose: Whether to show detailed logs Returns: (success:
(
num_gpu_experts: int,
model_path: Path,
config: dict,
verbose: bool = False,
)
| 85 | |
| 86 | |
| 87 | def test_config( |
| 88 | num_gpu_experts: int, |
| 89 | model_path: Path, |
| 90 | config: dict, |
| 91 | verbose: bool = False, |
| 92 | ) -> tuple[bool, float]: |
| 93 | """ |
| 94 | Test if a configuration with given num_gpu_experts works. |
| 95 | |
| 96 | Args: |
| 97 | num_gpu_experts: Number of GPU experts to test |
| 98 | model_path: Path to the model |
| 99 | config: Configuration dict with all parameters |
| 100 | verbose: Whether to show detailed logs |
| 101 | |
| 102 | Returns: |
| 103 | (success: bool, elapsed_time: float) |
| 104 | - success: True if server starts and inference works |
| 105 | - elapsed_time: Time taken for the test |
| 106 | """ |
| 107 | start_time = time.time() |
| 108 | |
| 109 | # Use random port to avoid conflicts |
| 110 | test_port = random.randint(30000, 40000) |
| 111 | |
| 112 | # Build command |
| 113 | cmd = [ |
| 114 | sys.executable, |
| 115 | "-m", |
| 116 | "sglang.launch_server", |
| 117 | "--model", |
| 118 | str(model_path), |
| 119 | "--port", |
| 120 | str(test_port), |
| 121 | "--host", |
| 122 | "127.0.0.1", |
| 123 | "--tensor-parallel-size", |
| 124 | str(config["tensor_parallel_size"]), |
| 125 | "--kt-num-gpu-experts", |
| 126 | str(num_gpu_experts), |
| 127 | "--max-total-tokens", |
| 128 | str(config["max_total_tokens"]), |
| 129 | ] |
| 130 | |
| 131 | # Add kt-kernel options |
| 132 | if config.get("weights_path"): |
| 133 | cmd.extend(["--kt-weight-path", str(config["weights_path"])]) |
| 134 | else: |
| 135 | cmd.extend(["--kt-weight-path", str(model_path)]) |
| 136 | |
| 137 | cmd.extend( |
| 138 | [ |
| 139 | "--kt-cpuinfer", |
| 140 | str(config.get("cpu_threads", 64)), |
| 141 | "--kt-threadpool-count", |
| 142 | str(config.get("numa_nodes", 2)), |
| 143 | "--kt-method", |
| 144 | config.get("kt_method", "AMXINT4"), |
no test coverage detected