MCPcopy
hub / github.com/InternLM/lmdeploy / build_model

Method build_model

lmdeploy/vl/model/xcomposer2.py:137–182  ·  view source on GitHub ↗

Build the vision part of a VLM model when backend is turbomind, or load the whole VLM model when `self.with_llm==True`

(self, trust_remote_code: bool = False)

Source from the content-addressed store, hash-verified

135 self.preprocess_func = self._preprocess_7b
136
137 def build_model(self, trust_remote_code: bool = False):
138 """Build the vision part of a VLM model when backend is turbomind, or
139 load the whole VLM model when `self.with_llm==True`"""
140 from accelerate import init_empty_weights
141 with init_empty_weights(), warnings.catch_warnings(), \
142 init_empty_vit(self.model_path):
143 warnings.simplefilter('ignore')
144 config = self.hf_config
145 model = AutoModelForCausalLM.from_config(config, trust_remote_code=trust_remote_code)
146 model.vit.load_model()
147 model.vit.resize_pos()
148 if hasattr(self.hf_config, 'img_size'):
149 model.vit.vision_tower.vision_model.embeddings.image_size = \
150 self.hf_config.img_size
151 model.vit.vision_tower.vision_model.post_layernorm.to_empty(device='cpu').half()
152 self.vl_model = model
153 if not self.with_llm:
154 del model.model
155 del model.output
156
157 from accelerate.utils import get_balanced_memory, infer_auto_device_map
158 max_memory = get_balanced_memory(model,
159 max_memory=self.max_memory,
160 dtype=torch.half,
161 no_split_module_classes=['CLIPEncoderLayer'])
162 device_map = infer_auto_device_map(model,
163 no_split_module_classes=['CLIPEncoderLayer'],
164 max_memory=max_memory,
165 dtype=torch.half)
166 # make all tensor on same device for postprocess
167 if 'plora_glb_GN' in device_map:
168 device_map['plora_sub_GN'] = device_map['plora_glb_GN']
169
170 from accelerate import load_checkpoint_and_dispatch
171 with disable_logging():
172 load_checkpoint_and_dispatch(model=model,
173 checkpoint=self.model_path,
174 device_map=device_map if not self.with_llm else {'': 'cpu'},
175 no_split_module_classes=['CLIPEncoderLayer'],
176 dtype=torch.half)
177
178 if 'plora_glb_GN' in device_map:
179 add_device_hook(model.vit.vision_tower.vision_model.encoder.layers[-1], device_map['plora_glb_GN'],
180 lambda x: (x[0].to(device=device_map['plora_glb_GN']), ))
181
182 self.model = model.eval()
183
184 def _preprocess_2d5(self, image: Image, params: dict) -> dict:
185 """Image preprocessing for internlm-xcomposer2d5-7b."""

Callers

nothing calls this directly

Calls 4

disable_loggingFunction · 0.90
add_device_hookFunction · 0.90
init_empty_vitFunction · 0.70
from_configMethod · 0.45

Tested by

no test coverage detected