(self, args)
| 73 | shared_token_load: TokenLoad = None |
| 74 | |
| 75 | def set_args(self, args): |
| 76 | self.args = args |
| 77 | from .api_lightllm import lightllm_generate, lightllm_generate_stream |
| 78 | from .api_tgi import tgi_generate_impl, tgi_generate_stream_impl |
| 79 | |
| 80 | if args.use_tgi_api: |
| 81 | self.g_generate_func = tgi_generate_impl |
| 82 | self.g_generate_stream_func = tgi_generate_stream_impl |
| 83 | else: |
| 84 | self.g_generate_func = lightllm_generate |
| 85 | self.g_generate_stream_func = lightllm_generate_stream |
| 86 | |
| 87 | setproctitle.setproctitle(f"lightllm::{get_unique_server_name()}::api_server") |
| 88 | |
| 89 | if args.run_mode == "pd_master": |
| 90 | self.metric_client = MetricClient(args.metric_port) |
| 91 | self.httpserver_manager = HttpServerManagerForPDMaster( |
| 92 | args, |
| 93 | metric_port=args.metric_port, |
| 94 | ) |
| 95 | else: |
| 96 | init_tokenizer(args) # for openai api |
| 97 | SamplingParams.load_generation_cfg(args.model_dir) |
| 98 | self.metric_client = MetricClient(args.metric_port) |
| 99 | self.httpserver_manager = HttpServerManager( |
| 100 | args, |
| 101 | router_port=args.router_port, |
| 102 | cache_port=args.cache_port, |
| 103 | detokenization_pub_port=args.detokenization_pub_port, |
| 104 | visual_port=args.visual_port, |
| 105 | enable_multimodal=args.enable_multimodal, |
| 106 | metric_port=args.metric_port, |
| 107 | ) |
| 108 | dp_size_in_node = max(1, args.dp // args.nnodes) # 兼容多机纯tp的运行模式,这时候 1 // 2 == 0, 需要兼容 |
| 109 | self.shared_token_load = TokenLoad(f"{get_unique_server_name()}_shared_token_load", dp_size_in_node) |
| 110 | |
| 111 | |
| 112 | g_objs = G_Objs() |
no test coverage detected