Builds the Serve app based on CLI arguments. See https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#command-line-arguments-for-the-server for the complete set of arguments. Supported engine arguments: https://docs.vllm.ai/en/latest/models/engine_args.html.
(cli_args: Dict[str, str])
| 131 | |
| 132 | |
| 133 | def build_app(cli_args: Dict[str, str]) -> serve.Application: |
| 134 | """Builds the Serve app based on CLI arguments. |
| 135 | |
| 136 | See https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#command-line-arguments-for-the-server |
| 137 | for the complete set of arguments. |
| 138 | |
| 139 | Supported engine arguments: https://docs.vllm.ai/en/latest/models/engine_args.html. |
| 140 | """ # noqa: E501 |
| 141 | parsed_args = parse_vllm_args(cli_args) |
| 142 | engine_args = AsyncEngineArgs.from_cli_args(parsed_args) |
| 143 | engine_args.worker_use_ray = True |
| 144 | |
| 145 | tp = engine_args.tensor_parallel_size |
| 146 | logger.info(f"Tensor parallelism = {tp}") |
| 147 | pg_resources = [] |
| 148 | pg_resources.append({"CPU": 1}) # for the deployment replica |
| 149 | for i in range(tp): |
| 150 | pg_resources.append({"CPU": 1, "GPU": 1}) # for the vLLM actors |
| 151 | print(f"{tp=}, {parsed_args=}, {engine_args=}") |
| 152 | print("-"*100) |
| 153 | |
| 154 | # We use the "STRICT_PACK" strategy below to ensure all vLLM actors are placed on |
| 155 | # the same Ray node. |
| 156 | if not ray.is_initialized(): |
| 157 | ray.init() |
| 158 | available_gpus = ray.available_resources()["GPU"] |
| 159 | return VLLMDeployment.options( |
| 160 | num_replicas=available_gpus // tp, |
| 161 | placement_group_bundles=pg_resources, |
| 162 | placement_group_strategy="STRICT_PACK", |
| 163 | ).bind( |
| 164 | engine_args, |
| 165 | parsed_args.response_role, |
| 166 | parsed_args.lora_modules, |
| 167 | parsed_args.prompt_adapters, |
| 168 | cli_args.get("request_logger"), |
| 169 | parsed_args.chat_template, |
| 170 | ) |
nothing calls this directly
no test coverage detected