MCPcopy
hub / github.com/InternLM/lmdeploy / build_model

Method build_model

lmdeploy/vl/model/molmo.py:26–50  ·  view source on GitHub ↗

Build the vision part of a VLM model when backend is turbomind, or load the whole VLM model when `self.with_llm==True`

(self, trust_remote_code: bool = False)

Source from the content-addressed store, hash-verified

24 device_map='auto')
25
26 def build_model(self, trust_remote_code: bool = False):
27 """Build the vision part of a VLM model when backend is turbomind, or
28 load the whole VLM model when `self.with_llm==True`"""
29 from accelerate import init_empty_weights, load_checkpoint_and_dispatch
30 with init_empty_weights():
31 model = AutoModelForCausalLM.from_config(self.hf_config, trust_remote_code=trust_remote_code)
32
33 self.vl_model = model
34 if not self.with_llm:
35 # Remove nn modules other than embedding from the LLM model
36 for key in ['emb_drop', 'ln_f', 'blocks', 'ff_out']:
37 del model.model.transformer[key]
38 self.token_embedding = model.model.transformer.wte
39
40 with disable_logging():
41 load_checkpoint_and_dispatch(model=model,
42 checkpoint=self.model_path,
43 device_map='auto' if not self.with_llm else {'': 'cpu'},
44 max_memory=self.max_memory,
45 no_split_module_classes=['ResidualAttentionBlock', 'Embedding'],
46 dtype=torch.half)
47
48 # We need eval mode to freeze the weights in model, thus,
49 # avoid randomness in inference.
50 self.model = model.eval()
51
52 def preprocess(self, messages: list[dict]) -> list[dict]:
53 """Refer to the `super.preprocess() for spec."""

Callers

nothing calls this directly

Calls 2

disable_loggingFunction · 0.90
from_configMethod · 0.45

Tested by

no test coverage detected