hub / github.com/microsoft/BitNet / prepare_model

Function prepare_model

setup_env.py:109–150 · view source on GitHub ↗

()

Source from the content-addressed store, hash-verified

107	sys.exit(1)
108
109	def prepare_model():
110	_, arch = system_info()
111	hf_url = args.hf_repo
112	model_dir = args.model_dir
113	quant_type = args.quant_type
114	quant_embd = args.quant_embd
115	if hf_url is not None:
116	# download the model
117	model_dir = os.path.join(model_dir, SUPPORTED_HF_MODELS[hf_url]["model_name"])
118	Path(model_dir).mkdir(parents=True, exist_ok=True)
119	logging.info(f"Downloading model {hf_url} from HuggingFace to {model_dir}...")
120	run_command(["huggingface-cli", "download", hf_url, "--local-dir", model_dir], log_step="download_model")
121	elif not os.path.exists(model_dir):
122	logging.error(f"Model directory {model_dir} does not exist.")
123	sys.exit(1)
124	else:
125	logging.info(f"Loading model from directory {model_dir}.")
126	gguf_path = os.path.join(model_dir, "ggml-model-" + quant_type + ".gguf")
127	if not os.path.exists(gguf_path) or os.path.getsize(gguf_path) == 0:
128	logging.info(f"Converting HF model to GGUF format...")
129	if quant_type.startswith("tl"):
130	run_command([sys.executable, "utils/convert-hf-to-gguf-bitnet.py", model_dir, "--outtype", quant_type, "--quant-embd"], log_step="convert_to_tl")
131	else: # i2s
132	# convert to f32
133	run_command([sys.executable, "utils/convert-hf-to-gguf-bitnet.py", model_dir, "--outtype", "f32"], log_step="convert_to_f32_gguf")
134	f32_model = os.path.join(model_dir, "ggml-model-f32.gguf")
135	i2s_model = os.path.join(model_dir, "ggml-model-i2_s.gguf")
136	# quantize to i2s
137	if platform.system() != "Windows":
138	if quant_embd:
139	run_command(["./build/bin/llama-quantize", "--token-embedding-type", "f16", f32_model, i2s_model, "I2_S", "1", "1"], log_step="quantize_to_i2s")
140	else:
141	run_command(["./build/bin/llama-quantize", f32_model, i2s_model, "I2_S", "1"], log_step="quantize_to_i2s")
142	else:
143	if quant_embd:
144	run_command(["./build/bin/Release/llama-quantize", "--token-embedding-type", "f16", f32_model, i2s_model, "I2_S", "1", "1"], log_step="quantize_to_i2s")
145	else:
146	run_command(["./build/bin/Release/llama-quantize", f32_model, i2s_model, "I2_S", "1"], log_step="quantize_to_i2s")
147
148	logging.info(f"GGUF model saved at {gguf_path}")
149	else:
150	logging.info(f"GGUF model already exists at {gguf_path}")
151
152	def setup_gguf():
153	# Install the pip package

Callers 1

mainFunction · 0.85

Calls 2

system_infoFunction · 0.85

run_commandFunction · 0.70

Tested by

no test coverage detected