To launch the proxy server. Args: server_name (str): the server name of the proxy. Default to '0.0.0.0'. server_port (str): the server port. Default to 8000. serving_strategy ('Hybrid' | 'DistServe'): the strategy to serving. Hybrid default. DistServe for PD
(server_name: str = '0.0.0.0',
server_port: int = 8000,
serving_strategy: Literal['Hybrid', 'DistServe'] = 'Hybrid',
routing_strategy: Literal['random', 'min_expected_latency', 'min_observed_latency'] = 'min_expected_latency',
api_keys: list[str] | str | None = None,
ssl: bool = False,
log_level: str = 'INFO',
disable_cache_status: bool = False,
link_type: Literal['RoCE', 'IB'] = 'RoCE',
migration_protocol: Literal['RDMA'] = 'RDMA',
dummy_prefill: bool = False,
**kwargs)
| 882 | |
| 883 | |
| 884 | def proxy(server_name: str = '0.0.0.0', |
| 885 | server_port: int = 8000, |
| 886 | serving_strategy: Literal['Hybrid', 'DistServe'] = 'Hybrid', |
| 887 | routing_strategy: Literal['random', 'min_expected_latency', 'min_observed_latency'] = 'min_expected_latency', |
| 888 | api_keys: list[str] | str | None = None, |
| 889 | ssl: bool = False, |
| 890 | log_level: str = 'INFO', |
| 891 | disable_cache_status: bool = False, |
| 892 | link_type: Literal['RoCE', 'IB'] = 'RoCE', |
| 893 | migration_protocol: Literal['RDMA'] = 'RDMA', |
| 894 | dummy_prefill: bool = False, |
| 895 | **kwargs): |
| 896 | """To launch the proxy server. |
| 897 | |
| 898 | Args: |
| 899 | server_name (str): the server name of the proxy. Default to '0.0.0.0'. |
| 900 | server_port (str): the server port. Default to 8000. |
| 901 | serving_strategy ('Hybrid' | 'DistServe'): the strategy to serving. Hybrid default. |
| 902 | DistServe for PD Disaggregation. |
| 903 | route_strategy ('random' | 'min_expected_latency' | 'min_observed_latency'): |
| 904 | the strategy to dispatch requests to nodes. Default to |
| 905 | 'min_expected_latency' |
| 906 | api_keys (list[str] | str | None): Optional list of API keys. Accepts string type as |
| 907 | a single api_key. Default to None, which means no api key applied. |
| 908 | ssl (bool): Enable SSL. Requires OS Environment variables 'SSL_KEYFILE' and 'SSL_CERTFILE'. |
| 909 | log_level (str): Set the log level. Default to INFO. |
| 910 | disable_cache_status (str): Whether to cache the proxy status to |
| 911 | proxy_config.yml. |
| 912 | migration_protocol: migration protocol when PD disaggregation. RDMA default. |
| 913 | """ # noqa |
| 914 | node_manager.serving_strategy = ServingStrategy[serving_strategy] |
| 915 | node_manager.routing_strategy = RoutingStrategy.from_str(routing_strategy) |
| 916 | node_manager.migration_protocol = MigrationProtocol[migration_protocol] |
| 917 | node_manager.dummy_prefill = dummy_prefill |
| 918 | |
| 919 | node_manager.rdma_config = DistServeRDMAConfig( |
| 920 | link_type=RDMALinkType[link_type], |
| 921 | with_gdr=True, |
| 922 | ) |
| 923 | node_manager.cache_status = not disable_cache_status |
| 924 | if api_keys is not None and (tokens := [key for key in api_keys if key]): |
| 925 | from lmdeploy.serve.utils.server_utils import AuthenticationMiddleware |
| 926 | |
| 927 | app.add_middleware(AuthenticationMiddleware, tokens=tokens) |
| 928 | |
| 929 | ssl_keyfile, ssl_certfile = None, None |
| 930 | if ssl: |
| 931 | ssl_keyfile = os.environ['SSL_KEYFILE'] |
| 932 | ssl_certfile = os.environ['SSL_CERTFILE'] |
| 933 | |
| 934 | logger.setLevel(log_level) |
| 935 | uvicorn_log_level = os.getenv('UVICORN_LOG_LEVEL', 'info').lower() |
| 936 | uvicorn.run(app=app, |
| 937 | host=server_name, |
| 938 | port=server_port, |
| 939 | log_level=uvicorn_log_level, |
| 940 | ssl_keyfile=ssl_keyfile, |
| 941 | ssl_certfile=ssl_certfile) |
no test coverage detected