(self, image: Image.Image)
| 121 | |
| 122 | @torch.no_grad() |
| 123 | def __call__(self, image: Image.Image) -> str: |
| 124 | images = [image] |
| 125 | image_sizes = [x.size for x in images] |
| 126 | images_tensor = process_images( |
| 127 | images, self.image_processor, self.model.config |
| 128 | ).to(self.device, dtype=torch.float16) |
| 129 | input_ids = ( |
| 130 | tokenizer_image_token( |
| 131 | self.prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt" |
| 132 | ).unsqueeze(0) |
| 133 | # .repeat(batch_size, 1) |
| 134 | .to(self.device) |
| 135 | ) |
| 136 | output_ids = self.model.generate( |
| 137 | input_ids, |
| 138 | images=images_tensor, |
| 139 | image_sizes=image_sizes, |
| 140 | do_sample=True if self.temperature > 0 else False, |
| 141 | temperature=self.temperature, |
| 142 | top_p=self.top_p, |
| 143 | num_beams=self.num_beams, |
| 144 | max_new_tokens=self.max_new_tokens, |
| 145 | use_cache=True, |
| 146 | ) |
| 147 | outputs = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True) |
| 148 | res = [s.strip() for s in outputs] |
| 149 | return res[0] |
| 150 | |
| 151 | |
| 152 | class RAMCaptioner(Captioner): |
nothing calls this directly
no test coverage detected