()
| 188 | |
| 189 | # 获取节点负载信息 |
| 190 | def _get_load_info() -> dict: |
| 191 | |
| 192 | from lightllm.server.api_http import g_objs |
| 193 | |
| 194 | assert g_objs.shared_token_load is not None, "shared_token_load is not initialized" |
| 195 | args = g_objs.args |
| 196 | dp_size_in_node = max(1, args.dp // args.nnodes) |
| 197 | |
| 198 | # 获取当前每个 dp 的负载,数值含义为当前的 token 总容量使用率, 上报给 PD_Master 用于做 |
| 199 | # 调度决策。 |
| 200 | current_load = [ |
| 201 | float(g_objs.shared_token_load.get_dynamic_max_load(dp_index)) for dp_index in range(dp_size_in_node) |
| 202 | ] |
| 203 | mean_node_load = sum(current_load) / len(current_load) |
| 204 | load_info = { |
| 205 | "total_token_usage_rate": mean_node_load, |
| 206 | "client_ip_port": f"{g_objs.httpserver_manager.host_ip}:{g_objs.args.port}", |
| 207 | } |
| 208 | return load_info |
no test coverage detected