Method build_model

lmdeploy/vl/model/molmo.py:26–50 · view source on GitHub ↗

Build the vision part of a VLM model when backend is turbomind, or load the whole VLM model when `self.with_llm==True`

(self, trust_remote_code: bool = False)

Source from the content-addressed store, hash-verified

24	device_map='auto')
25
26	def build_model(self, trust_remote_code: bool = False):
27	"""Build the vision part of a VLM model when backend is turbomind, or
28	load the whole VLM model when `self.with_llm==True`"""
29	from accelerate import init_empty_weights, load_checkpoint_and_dispatch
30	with init_empty_weights():
31	model = AutoModelForCausalLM.from_config(self.hf_config, trust_remote_code=trust_remote_code)
32
33	self.vl_model = model
34	if not self.with_llm:
35	# Remove nn modules other than embedding from the LLM model
36	for key in ['emb_drop', 'ln_f', 'blocks', 'ff_out']:
37	del model.model.transformer[key]
38	self.token_embedding = model.model.transformer.wte
39
40	with disable_logging():
41	load_checkpoint_and_dispatch(model=model,
42	checkpoint=self.model_path,
43	device_map='auto' if not self.with_llm else {'': 'cpu'},
44	max_memory=self.max_memory,
45	no_split_module_classes=['ResidualAttentionBlock', 'Embedding'],
46	dtype=torch.half)
47
48	# We need eval mode to freeze the weights in model, thus,
49	# avoid randomness in inference.
50	self.model = model.eval()
51
52	def preprocess(self, messages: list[dict]) -> list[dict]:
53	"""Refer to the `super.preprocess() for spec."""

nothing calls this directly

disable_loggingFunction · 0.90

from_configMethod · 0.45

no test coverage detected