(parser)
| 460 | |
| 461 | |
| 462 | def add_model_args(parser): |
| 463 | parser.add_argument( |
| 464 | "--model-path", |
| 465 | type=str, |
| 466 | default="lmsys/vicuna-7b-v1.5", |
| 467 | help="The path to the weights. This can be a local folder or a Hugging Face repo ID.", |
| 468 | ) |
| 469 | parser.add_argument( |
| 470 | "--revision", |
| 471 | type=str, |
| 472 | default="main", |
| 473 | help="Hugging Face Hub model revision identifier", |
| 474 | ) |
| 475 | parser.add_argument( |
| 476 | "--device", |
| 477 | type=str, |
| 478 | choices=["cpu", "cuda", "mps", "xpu", "npu"], |
| 479 | default="cuda", |
| 480 | help="The device type", |
| 481 | ) |
| 482 | parser.add_argument( |
| 483 | "--gpus", |
| 484 | type=str, |
| 485 | default=None, |
| 486 | help="A single GPU like 1 or multiple GPUs like 0,2", |
| 487 | ) |
| 488 | parser.add_argument("--num-gpus", type=int, default=1) |
| 489 | parser.add_argument( |
| 490 | "--max-gpu-memory", |
| 491 | type=str, |
| 492 | help="The maximum memory per GPU for storing model weights. Use a string like '13Gib'", |
| 493 | ) |
| 494 | parser.add_argument( |
| 495 | "--dtype", |
| 496 | type=str, |
| 497 | choices=["float32", "float16", "bfloat16"], |
| 498 | help="Override the default dtype. If not set, it will use float16 on GPU and float32 on CPU.", |
| 499 | default=None, |
| 500 | ) |
| 501 | parser.add_argument( |
| 502 | "--load-8bit", action="store_true", help="Use 8-bit quantization" |
| 503 | ) |
| 504 | parser.add_argument( |
| 505 | "--cpu-offloading", |
| 506 | action="store_true", |
| 507 | help="Only when using 8-bit quantization: Offload excess weights to the CPU that don't fit on the GPU", |
| 508 | ) |
| 509 | parser.add_argument( |
| 510 | "--gptq-ckpt", |
| 511 | type=str, |
| 512 | default=None, |
| 513 | help="Used for GPTQ. The path to the local GPTQ checkpoint.", |
| 514 | ) |
| 515 | parser.add_argument( |
| 516 | "--gptq-wbits", |
| 517 | type=int, |
| 518 | default=16, |
| 519 | choices=[2, 3, 4, 8, 16], |
no outgoing calls
no test coverage detected
searching dependent graphs…