hub / github.com/ttengwang/Caption-Anything / parse_dense_caption

Method parse_dense_caption

caption_anything/model.py:219–268 · view source on GitHub ↗

(self, image, topN=10, reference_caption=[], verbose=False)

Source from the content-addressed store, hash-verified

217	return out_list
218
219	def parse_dense_caption(self, image, topN=10, reference_caption=[], verbose=False):
220	width, height = get_image_shape(image)
221	prompt = {'prompt_type': ['everything']}
222	densecap_args = {
223	'return_ppl': True,
224	'clip_filter': True,
225	'reference_caption': reference_caption,
226	'text_prompt': "", # 'Question: what does the image show? Answer:'
227	'seg_crop_mode': 'w_bg',
228	# 'text_prompt': "",
229	# 'seg_crop_mode': 'wo_bg',
230	'disable_regular_box': False,
231	'topN': topN,
232	'min_ppl_score': -1.8,
233	'min_clip_score': 0.30,
234	'min_mask_area': 2500,
235	}
236
237	dense_captions = self.inference(image, prompt,
238	controls=None,
239	disable_gpt=True,
240	verbose=verbose,
241	is_densecap=True,
242	args=densecap_args)
243	print('Process Dense Captioning: \n', dense_captions)
244	dense_captions = list(filter(
245	lambda x: x['ppl_score'] / (1 + len(x['generated_captions']['raw_caption'].split())) >= densecap_args[
246	'min_ppl_score'], dense_captions))
247	dense_captions = list(filter(lambda x: x['clip_score'] >= densecap_args['min_clip_score'], dense_captions))
248	dense_cap_prompt = []
249	for cap in dense_captions:
250	x, y, w, h = cap['bbox']
251	cx, cy = x + w / 2, (y + h / 2)
252	dense_cap_prompt.append(
253	"({}: X:{:.0f}, Y:{:.0f}, Width:{:.0f}, Height:{:.0f})".format(cap['generated_captions']['raw_caption'],
254	cx, cy, w, h))
255
256	if verbose:
257	all_masks = [np.array(item['mask'].convert('P')) for item in dense_captions]
258	new_image = mask_painter_foreground_all(np.array(image), all_masks, background_alpha=0.4)
259	save_path = 'result/dense_caption_mask.png'
260	Image.fromarray(new_image).save(save_path)
261	print(f'Dense captioning mask saved in {save_path}')
262
263	vis_path = 'result/dense_caption_vis_{}.png'.format(time.time())
264	dense_cap_painter_input = [{'bbox': xywh_to_x1y1x2y2(cap['bbox']),
265	'caption': cap['generated_captions']['raw_caption']} for cap in dense_captions]
266	draw_bbox(load_image(image, return_type='numpy'), vis_path, dense_cap_painter_input, show_caption=True)
267	print(f'Dense Captioning visualization saved in {vis_path}')
268	return ','.join(dense_cap_prompt)
269
270	def parse_ocr(self, image, thres=0.2):
271	width, height = get_image_shape(image)

Callers 1

inference_cap_everythingMethod · 0.95

Calls 6

inferenceMethod · 0.95

get_image_shapeFunction · 0.90

mask_painter_foreground_allFunction · 0.90

xywh_to_x1y1x2y2Function · 0.90

draw_bboxFunction · 0.90

load_imageFunction · 0.90

Tested by

no test coverage detected