hub / github.com/InternLM/lmdeploy / build_model

Method build_model

lmdeploy/vl/model/xcomposer2.py:137–182 · view source on GitHub ↗

Build the vision part of a VLM model when backend is turbomind, or load the whole VLM model when `self.with_llm==True`

(self, trust_remote_code: bool = False)

Source from the content-addressed store, hash-verified

135	self.preprocess_func = self._preprocess_7b
136
137	def build_model(self, trust_remote_code: bool = False):
138	"""Build the vision part of a VLM model when backend is turbomind, or
139	load the whole VLM model when `self.with_llm==True`"""
140	from accelerate import init_empty_weights
141	with init_empty_weights(), warnings.catch_warnings(), \
142	init_empty_vit(self.model_path):
143	warnings.simplefilter('ignore')
144	config = self.hf_config
145	model = AutoModelForCausalLM.from_config(config, trust_remote_code=trust_remote_code)
146	model.vit.load_model()
147	model.vit.resize_pos()
148	if hasattr(self.hf_config, 'img_size'):
149	model.vit.vision_tower.vision_model.embeddings.image_size = \
150	self.hf_config.img_size
151	model.vit.vision_tower.vision_model.post_layernorm.to_empty(device='cpu').half()
152	self.vl_model = model
153	if not self.with_llm:
154	del model.model
155	del model.output
156
157	from accelerate.utils import get_balanced_memory, infer_auto_device_map
158	max_memory = get_balanced_memory(model,
159	max_memory=self.max_memory,
160	dtype=torch.half,
161	no_split_module_classes=['CLIPEncoderLayer'])
162	device_map = infer_auto_device_map(model,
163	no_split_module_classes=['CLIPEncoderLayer'],
164	max_memory=max_memory,
165	dtype=torch.half)
166	# make all tensor on same device for postprocess
167	if 'plora_glb_GN' in device_map:
168	device_map['plora_sub_GN'] = device_map['plora_glb_GN']
169
170	from accelerate import load_checkpoint_and_dispatch
171	with disable_logging():
172	load_checkpoint_and_dispatch(model=model,
173	checkpoint=self.model_path,
174	device_map=device_map if not self.with_llm else {'': 'cpu'},
175	no_split_module_classes=['CLIPEncoderLayer'],
176	dtype=torch.half)
177
178	if 'plora_glb_GN' in device_map:
179	add_device_hook(model.vit.vision_tower.vision_model.encoder.layers[-1], device_map['plora_glb_GN'],
180	lambda x: (x[0].to(device=device_map['plora_glb_GN']), ))
181
182	self.model = model.eval()
183
184	def _preprocess_2d5(self, image: Image, params: dict) -> dict:
185	"""Image preprocessing for internlm-xcomposer2d5-7b."""

Callers

nothing calls this directly

Calls 4

disable_loggingFunction · 0.90

add_device_hookFunction · 0.90

init_empty_vitFunction · 0.70

from_configMethod · 0.45

Tested by

no test coverage detected