MCPcopy Index your code
hub / github.com/microsoft/BitNet / prepare_model

Function prepare_model

setup_env.py:109–150  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

107 sys.exit(1)
108
109def prepare_model():
110 _, arch = system_info()
111 hf_url = args.hf_repo
112 model_dir = args.model_dir
113 quant_type = args.quant_type
114 quant_embd = args.quant_embd
115 if hf_url is not None:
116 # download the model
117 model_dir = os.path.join(model_dir, SUPPORTED_HF_MODELS[hf_url]["model_name"])
118 Path(model_dir).mkdir(parents=True, exist_ok=True)
119 logging.info(f"Downloading model {hf_url} from HuggingFace to {model_dir}...")
120 run_command(["huggingface-cli", "download", hf_url, "--local-dir", model_dir], log_step="download_model")
121 elif not os.path.exists(model_dir):
122 logging.error(f"Model directory {model_dir} does not exist.")
123 sys.exit(1)
124 else:
125 logging.info(f"Loading model from directory {model_dir}.")
126 gguf_path = os.path.join(model_dir, "ggml-model-" + quant_type + ".gguf")
127 if not os.path.exists(gguf_path) or os.path.getsize(gguf_path) == 0:
128 logging.info(f"Converting HF model to GGUF format...")
129 if quant_type.startswith("tl"):
130 run_command([sys.executable, "utils/convert-hf-to-gguf-bitnet.py", model_dir, "--outtype", quant_type, "--quant-embd"], log_step="convert_to_tl")
131 else: # i2s
132 # convert to f32
133 run_command([sys.executable, "utils/convert-hf-to-gguf-bitnet.py", model_dir, "--outtype", "f32"], log_step="convert_to_f32_gguf")
134 f32_model = os.path.join(model_dir, "ggml-model-f32.gguf")
135 i2s_model = os.path.join(model_dir, "ggml-model-i2_s.gguf")
136 # quantize to i2s
137 if platform.system() != "Windows":
138 if quant_embd:
139 run_command(["./build/bin/llama-quantize", "--token-embedding-type", "f16", f32_model, i2s_model, "I2_S", "1", "1"], log_step="quantize_to_i2s")
140 else:
141 run_command(["./build/bin/llama-quantize", f32_model, i2s_model, "I2_S", "1"], log_step="quantize_to_i2s")
142 else:
143 if quant_embd:
144 run_command(["./build/bin/Release/llama-quantize", "--token-embedding-type", "f16", f32_model, i2s_model, "I2_S", "1", "1"], log_step="quantize_to_i2s")
145 else:
146 run_command(["./build/bin/Release/llama-quantize", f32_model, i2s_model, "I2_S", "1"], log_step="quantize_to_i2s")
147
148 logging.info(f"GGUF model saved at {gguf_path}")
149 else:
150 logging.info(f"GGUF model already exists at {gguf_path}")
151
152def setup_gguf():
153 # Install the pip package

Callers 1

mainFunction · 0.85

Calls 2

system_infoFunction · 0.85
run_commandFunction · 0.70

Tested by

no test coverage detected