()
| 301 | |
| 302 | |
| 303 | def create_model_worker(): |
| 304 | parser = argparse.ArgumentParser() |
| 305 | parser.add_argument("--host", type=str, default="localhost") |
| 306 | parser.add_argument("--port", type=int, default=21002) |
| 307 | parser.add_argument("--worker-address", type=str, default="http://localhost:21002") |
| 308 | parser.add_argument( |
| 309 | "--controller-address", type=str, default="http://localhost:21001" |
| 310 | ) |
| 311 | add_model_args(parser) |
| 312 | parser.add_argument( |
| 313 | "--model-names", |
| 314 | type=lambda s: s.split(","), |
| 315 | help="Optional display comma separated names", |
| 316 | ) |
| 317 | parser.add_argument( |
| 318 | "--conv-template", type=str, default=None, help="Conversation prompt template." |
| 319 | ) |
| 320 | parser.add_argument("--embed-in-truncate", action="store_true") |
| 321 | parser.add_argument( |
| 322 | "--limit-worker-concurrency", |
| 323 | type=int, |
| 324 | default=5, |
| 325 | help="Limit the model concurrency to prevent OOM.", |
| 326 | ) |
| 327 | parser.add_argument("--stream-interval", type=int, default=2) |
| 328 | parser.add_argument("--no-register", action="store_true") |
| 329 | parser.add_argument( |
| 330 | "--seed", |
| 331 | type=int, |
| 332 | default=None, |
| 333 | help="Overwrite the random seed for each generation.", |
| 334 | ) |
| 335 | parser.add_argument( |
| 336 | "--debug", type=bool, default=False, help="Print debugging messages" |
| 337 | ) |
| 338 | parser.add_argument( |
| 339 | "--ssl", |
| 340 | action="store_true", |
| 341 | required=False, |
| 342 | default=False, |
| 343 | help="Enable SSL. Requires OS Environment variables 'SSL_KEYFILE' and 'SSL_CERTFILE'.", |
| 344 | ) |
| 345 | args = parser.parse_args() |
| 346 | logger.info(f"args: {args}") |
| 347 | |
| 348 | if args.gpus: |
| 349 | if len(args.gpus.split(",")) < args.num_gpus: |
| 350 | raise ValueError( |
| 351 | f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!" |
| 352 | ) |
| 353 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus |
| 354 | |
| 355 | gptq_config = GptqConfig( |
| 356 | ckpt=args.gptq_ckpt or args.model_path, |
| 357 | wbits=args.gptq_wbits, |
| 358 | groupsize=args.gptq_groupsize, |
| 359 | act_order=args.gptq_act_order, |
| 360 | ) |
no test coverage detected
searching dependent graphs…