MCPcopy
hub / github.com/ttengwang/Caption-Anything / parse_dense_caption

Method parse_dense_caption

caption_anything/model.py:219–268  ·  view source on GitHub ↗
(self, image, topN=10, reference_caption=[], verbose=False)

Source from the content-addressed store, hash-verified

217 return out_list
218
219 def parse_dense_caption(self, image, topN=10, reference_caption=[], verbose=False):
220 width, height = get_image_shape(image)
221 prompt = {'prompt_type': ['everything']}
222 densecap_args = {
223 'return_ppl': True,
224 'clip_filter': True,
225 'reference_caption': reference_caption,
226 'text_prompt': "", # 'Question: what does the image show? Answer:'
227 'seg_crop_mode': 'w_bg',
228 # 'text_prompt': "",
229 # 'seg_crop_mode': 'wo_bg',
230 'disable_regular_box': False,
231 'topN': topN,
232 'min_ppl_score': -1.8,
233 'min_clip_score': 0.30,
234 'min_mask_area': 2500,
235 }
236
237 dense_captions = self.inference(image, prompt,
238 controls=None,
239 disable_gpt=True,
240 verbose=verbose,
241 is_densecap=True,
242 args=densecap_args)
243 print('Process Dense Captioning: \n', dense_captions)
244 dense_captions = list(filter(
245 lambda x: x['ppl_score'] / (1 + len(x['generated_captions']['raw_caption'].split())) >= densecap_args[
246 'min_ppl_score'], dense_captions))
247 dense_captions = list(filter(lambda x: x['clip_score'] >= densecap_args['min_clip_score'], dense_captions))
248 dense_cap_prompt = []
249 for cap in dense_captions:
250 x, y, w, h = cap['bbox']
251 cx, cy = x + w / 2, (y + h / 2)
252 dense_cap_prompt.append(
253 "({}: X:{:.0f}, Y:{:.0f}, Width:{:.0f}, Height:{:.0f})".format(cap['generated_captions']['raw_caption'],
254 cx, cy, w, h))
255
256 if verbose:
257 all_masks = [np.array(item['mask'].convert('P')) for item in dense_captions]
258 new_image = mask_painter_foreground_all(np.array(image), all_masks, background_alpha=0.4)
259 save_path = 'result/dense_caption_mask.png'
260 Image.fromarray(new_image).save(save_path)
261 print(f'Dense captioning mask saved in {save_path}')
262
263 vis_path = 'result/dense_caption_vis_{}.png'.format(time.time())
264 dense_cap_painter_input = [{'bbox': xywh_to_x1y1x2y2(cap['bbox']),
265 'caption': cap['generated_captions']['raw_caption']} for cap in dense_captions]
266 draw_bbox(load_image(image, return_type='numpy'), vis_path, dense_cap_painter_input, show_caption=True)
267 print(f'Dense Captioning visualization saved in {vis_path}')
268 return ','.join(dense_cap_prompt)
269
270 def parse_ocr(self, image, thres=0.2):
271 width, height = get_image_shape(image)

Callers 1

Calls 6

inferenceMethod · 0.95
get_image_shapeFunction · 0.90
xywh_to_x1y1x2y2Function · 0.90
draw_bboxFunction · 0.90
load_imageFunction · 0.90

Tested by

no test coverage detected