Class RAMCaptioner

diffbir/utils/caption.py:152–171 · view source on GitHub ↗

Source from the content-addressed store, hash-verified

150
151
152	class RAMCaptioner(Captioner):
153
154	def __init__(self, device: torch.device) -> Captioner:
155	super().__init__(device)
156	image_size = 384
157	transform = get_transform(image_size=image_size)
158	pretrained = "https://huggingface.co/xinyu1205/recognize-anything-plus-model/resolve/main/ram_plus_swin_large_14m.pth"
159	model = ram_plus(pretrained=pretrained, image_size=image_size, vit="swin_l")
160	model.eval()
161	model = model.to(device)
162
163	self.transform = transform
164	self.model = model
165
166	def __call__(self, image: Image.Image) -> str:
167	image = self.transform(image).unsqueeze(0).to(self.device)
168	res = inference(image, self.model)
169	# res[0]: armchair \| blanket \| lamp \| ...
170	# res[1]: 扶手椅 \| 毯子/覆盖层 \| 灯 \| ...
171	return res[0].replace(" \| ", ", ")

run_gradio.pyFile · 0.90

load_captionerMethod · 0.85

no outgoing calls

no test coverage detected