MCPcopy
hub / github.com/InternLM/lmdeploy / preprocess

Method preprocess

lmdeploy/vl/model/base.py:123–238  ·  view source on GitHub ↗

Preprocess multimodal data and return a dict with ``input_ids`` and multimodal features. New-style models inherit this implementation. Legacy models override with `def preprocess(self, messages)`.

(self,
                   messages: list[dict],
                   input_prompt: str | list[int],
                   mm_processor_kwargs: dict[str, Any] | None = None)

Source from the content-addressed store, hash-verified

121 raise NotImplementedError()
122
123 def preprocess(self,
124 messages: list[dict],
125 input_prompt: str | list[int],
126 mm_processor_kwargs: dict[str, Any] | None = None) -> dict[str, Any]:
127 """Preprocess multimodal data and return a dict with ``input_ids`` and
128 multimodal features.
129
130 New-style models inherit this implementation. Legacy models override with `def preprocess(self, messages)`.
131 """
132
133 mm_items = self.collect_multimodal_items(messages)
134
135 raw_images, raw_videos, video_metadatas = [], [], []
136 raw_audios = []
137 raw_time_series, sampling_rates = [], []
138 for modality, data, params in mm_items:
139 if modality == Modality.IMAGE:
140 raw_images.append(data)
141 elif modality == Modality.VIDEO:
142 raw_videos.append(data)
143 video_metadatas.append(params.get('video_metadata', None))
144 elif modality == Modality.AUDIO:
145 raw_audios.append(data[0] if isinstance(data, tuple) else data)
146 elif modality == Modality.TIME_SERIES:
147 raw_time_series.append(data)
148 sampling_rates.append(params.get('sampling_rate', None))
149 else:
150 raise ValueError(f'unsupported modality {modality}')
151
152 # get kwargs for processor
153 kwargs = {}
154 images_kwargs = {}
155 videos_kwargs = {}
156 audio_kwargs = {}
157 mm_processor_kwargs = mm_processor_kwargs or {}
158 if raw_images:
159 kwargs['images'] = raw_images
160 image_size = get_override_size(self.processor.image_processor,
161 mm_processor_kwargs.get('image'),
162 modality='image')
163 if image_size is not None:
164 images_kwargs['size'] = image_size
165 if raw_videos:
166 kwargs['videos'] = raw_videos
167 videos_kwargs['video_metadata'] = video_metadatas
168 # perform resize in hf processor, while sample frames has been done in video loader
169 videos_kwargs['do_resize'] = True
170 videos_kwargs['do_sample_frames'] = False
171 video_size = get_override_size(self.processor.video_processor,
172 mm_processor_kwargs.get('video'),
173 modality='video')
174 if video_size is not None:
175 videos_kwargs['size'] = video_size
176 if raw_audios:
177 kwargs['audio'] = raw_audios
178 audio_kwargs = dict(mm_processor_kwargs.get('audio') or {})
179 feature_extractor = getattr(self.processor, 'feature_extractor', None)
180 sampling_rate = getattr(feature_extractor, 'sampling_rate', None)

Callers

nothing calls this directly

Calls 10

get_override_sizeFunction · 0.90
get_expanded_input_idsFunction · 0.90
get_mm_items_offsetFunction · 0.90
get_expanded_mm_itemsFunction · 0.90
itemsMethod · 0.80
appendMethod · 0.45
getMethod · 0.45

Tested by

no test coverage detected