()
| 222 | prepare_model() |
| 223 | |
| 224 | def parse_args(): |
| 225 | _, arch = system_info() |
| 226 | parser = argparse.ArgumentParser(description='Setup the environment for running the inference') |
| 227 | parser.add_argument("--hf-repo", "-hr", type=str, help="Model used for inference", choices=SUPPORTED_HF_MODELS.keys()) |
| 228 | parser.add_argument("--model-dir", "-md", type=str, help="Directory to save/load the model", default="models") |
| 229 | parser.add_argument("--log-dir", "-ld", type=str, help="Directory to save the logging info", default="logs") |
| 230 | parser.add_argument("--quant-type", "-q", type=str, help="Quantization type", choices=SUPPORTED_QUANT_TYPES[arch], default="i2_s") |
| 231 | parser.add_argument("--quant-embd", action="store_true", help="Quantize the embeddings to f16") |
| 232 | parser.add_argument("--use-pretuned", "-p", action="store_true", help="Use the pretuned kernel parameters") |
| 233 | return parser.parse_args() |
| 234 | |
| 235 | def signal_handler(sig, frame): |
| 236 | logging.info("Ctrl+C pressed, exiting...") |
no test coverage detected