(self,
config_path: str | None = None,
serving_strategy: str = 'Hybrid',
routing_strategy: str = 'min_expected_latency',
migration_protocol: str = 'RDMA',
link_type: str = 'RoCE',
with_gdr: bool = True,
cache_status: bool = True)
| 84 | """ |
| 85 | |
| 86 | def __init__(self, |
| 87 | config_path: str | None = None, |
| 88 | serving_strategy: str = 'Hybrid', |
| 89 | routing_strategy: str = 'min_expected_latency', |
| 90 | migration_protocol: str = 'RDMA', |
| 91 | link_type: str = 'RoCE', |
| 92 | with_gdr: bool = True, |
| 93 | cache_status: bool = True) -> None: |
| 94 | self.nodes = dict() |
| 95 | self.serving_strategy = ServingStrategy[serving_strategy] |
| 96 | self.routing_strategy = RoutingStrategy.from_str(routing_strategy) |
| 97 | |
| 98 | self.cache_status = cache_status |
| 99 | self.latencies = dict() |
| 100 | self.config_path = osp.join(osp.dirname(osp.realpath(__file__)), 'proxy_config.json') |
| 101 | if config_path is not None: |
| 102 | self.config_path = config_path |
| 103 | if osp.exists(self.config_path) and self.cache_status: |
| 104 | with open(self.config_path) as config_file: |
| 105 | if os.path.getsize(self.config_path) > 0: |
| 106 | logger.info(f'loading node configuration: {self.config_path}') |
| 107 | config = json.load(config_file) |
| 108 | self.nodes = { |
| 109 | node_url: Status.model_validate_json(node_status) |
| 110 | for node_url, node_status in config.items() |
| 111 | } |
| 112 | self.heart_beat_thread = threading.Thread(target=heart_beat_controller, args=(self, ), daemon=True) |
| 113 | self.heart_beat_thread.start() |
| 114 | self.aiotimeout = aiohttp.ClientTimeout(total=AIOHTTP_TIMEOUT) |
| 115 | |
| 116 | # For PD Disaggregation |
| 117 | self.migration_protocol = MigrationProtocol[migration_protocol] |
| 118 | self.rdma_config = DistServeRDMAConfig(with_gdr=with_gdr, link_type=RDMALinkType[link_type]) |
| 119 | self.pd_connection_pool = PDConnectionPool() |
| 120 | self.dummy_prefill = False |
| 121 | |
| 122 | def get_nodes(self, role: EngineRole) -> dict[str, Status]: |
| 123 | items = list(self.nodes.items()) |
nothing calls this directly
no test coverage detected