()
| 107 | sys.exit(1) |
| 108 | |
| 109 | def prepare_model(): |
| 110 | _, arch = system_info() |
| 111 | hf_url = args.hf_repo |
| 112 | model_dir = args.model_dir |
| 113 | quant_type = args.quant_type |
| 114 | quant_embd = args.quant_embd |
| 115 | if hf_url is not None: |
| 116 | # download the model |
| 117 | model_dir = os.path.join(model_dir, SUPPORTED_HF_MODELS[hf_url]["model_name"]) |
| 118 | Path(model_dir).mkdir(parents=True, exist_ok=True) |
| 119 | logging.info(f"Downloading model {hf_url} from HuggingFace to {model_dir}...") |
| 120 | run_command(["huggingface-cli", "download", hf_url, "--local-dir", model_dir], log_step="download_model") |
| 121 | elif not os.path.exists(model_dir): |
| 122 | logging.error(f"Model directory {model_dir} does not exist.") |
| 123 | sys.exit(1) |
| 124 | else: |
| 125 | logging.info(f"Loading model from directory {model_dir}.") |
| 126 | gguf_path = os.path.join(model_dir, "ggml-model-" + quant_type + ".gguf") |
| 127 | if not os.path.exists(gguf_path) or os.path.getsize(gguf_path) == 0: |
| 128 | logging.info(f"Converting HF model to GGUF format...") |
| 129 | if quant_type.startswith("tl"): |
| 130 | run_command([sys.executable, "utils/convert-hf-to-gguf-bitnet.py", model_dir, "--outtype", quant_type, "--quant-embd"], log_step="convert_to_tl") |
| 131 | else: # i2s |
| 132 | # convert to f32 |
| 133 | run_command([sys.executable, "utils/convert-hf-to-gguf-bitnet.py", model_dir, "--outtype", "f32"], log_step="convert_to_f32_gguf") |
| 134 | f32_model = os.path.join(model_dir, "ggml-model-f32.gguf") |
| 135 | i2s_model = os.path.join(model_dir, "ggml-model-i2_s.gguf") |
| 136 | # quantize to i2s |
| 137 | if platform.system() != "Windows": |
| 138 | if quant_embd: |
| 139 | run_command(["./build/bin/llama-quantize", "--token-embedding-type", "f16", f32_model, i2s_model, "I2_S", "1", "1"], log_step="quantize_to_i2s") |
| 140 | else: |
| 141 | run_command(["./build/bin/llama-quantize", f32_model, i2s_model, "I2_S", "1"], log_step="quantize_to_i2s") |
| 142 | else: |
| 143 | if quant_embd: |
| 144 | run_command(["./build/bin/Release/llama-quantize", "--token-embedding-type", "f16", f32_model, i2s_model, "I2_S", "1", "1"], log_step="quantize_to_i2s") |
| 145 | else: |
| 146 | run_command(["./build/bin/Release/llama-quantize", f32_model, i2s_model, "I2_S", "1"], log_step="quantize_to_i2s") |
| 147 | |
| 148 | logging.info(f"GGUF model saved at {gguf_path}") |
| 149 | else: |
| 150 | logging.info(f"GGUF model already exists at {gguf_path}") |
| 151 | |
| 152 | def setup_gguf(): |
| 153 | # Install the pip package |
no test coverage detected