r""" Args: labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., config.vocab_size]` or -100 (see `input_ids` docstring). Toke
(
self,
input_ids: torch.LongTensor = None,
pixel_values: Union[torch.FloatTensor, List[torch.FloatTensor], List[List[torch.FloatTensor]]] = None,
image_sizes: Union[torch.LongTensor, List[torch.LongTensor], List[List[torch.LongTensor]]] = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
vision_feature_layer: Optional[int] = None,
vision_feature_select_strategy: Optional[str] = None,
labels: Optional[torch.LongTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
)
| 583 | @add_start_docstrings_to_model_forward(MAGMA_INPUTS_DOCSTRING) |
| 584 | @replace_return_docstrings(output_type=MagmaCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) |
| 585 | def forward( |
| 586 | self, |
| 587 | input_ids: torch.LongTensor = None, |
| 588 | pixel_values: Union[torch.FloatTensor, List[torch.FloatTensor], List[List[torch.FloatTensor]]] = None, |
| 589 | image_sizes: Union[torch.LongTensor, List[torch.LongTensor], List[List[torch.LongTensor]]] = None, |
| 590 | attention_mask: Optional[torch.Tensor] = None, |
| 591 | position_ids: Optional[torch.LongTensor] = None, |
| 592 | past_key_values: Optional[List[torch.FloatTensor]] = None, |
| 593 | inputs_embeds: Optional[torch.FloatTensor] = None, |
| 594 | vision_feature_layer: Optional[int] = None, |
| 595 | vision_feature_select_strategy: Optional[str] = None, |
| 596 | labels: Optional[torch.LongTensor] = None, |
| 597 | use_cache: Optional[bool] = None, |
| 598 | output_attentions: Optional[bool] = None, |
| 599 | output_hidden_states: Optional[bool] = None, |
| 600 | return_dict: Optional[bool] = None, |
| 601 | ) -> Union[Tuple, MagmaCausalLMOutputWithPast]: |
| 602 | r""" |
| 603 | Args: |
| 604 | labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): |
| 605 | Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., |
| 606 | config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored |
| 607 | (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`. |
| 608 | |
| 609 | Returns: |
| 610 | |
| 611 | Example: |
| 612 | |
| 613 | ```python |
| 614 | >>> from PIL import Image |
| 615 | >>> import requests |
| 616 | >>> from transformers import AutoProcessor, MagmaForConditionalGeneration |
| 617 | |
| 618 | >>> model = MagmaForConditionalGeneration.from_pretrained("microsoft/magma-8b-hf") |
| 619 | >>> processor = AutoProcessor.from_pretrained("microsoft/magma-8b-hf") |
| 620 | |
| 621 | >>> prompt = "[INST] <image>\nWhat is shown in this image? [/INST]" |
| 622 | >>> url = "https://www.ilankelman.org/stopsigns/australia.jpg" |
| 623 | >>> image = Image.open(requests.get(url, stream=True).raw) |
| 624 | |
| 625 | >>> inputs = processor(text=prompt, images=image, return_tensors="pt") |
| 626 | |
| 627 | >>> # Generate |
| 628 | >>> generate_ids = model.generate(**inputs, max_length=30) |
| 629 | >>> processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] |
| 630 | "[INST] \nWhat is shown in this image? [/INST] The image appears to be a radar chart, which is a type of multi-dimensional plot (...)" |
| 631 | ```""" |
| 632 | output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions |
| 633 | output_hidden_states = ( |
| 634 | output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states |
| 635 | ) |
| 636 | return_dict = return_dict if return_dict is not None else self.config.use_return_dict |
| 637 | vision_feature_layer = ( |
| 638 | vision_feature_layer if vision_feature_layer is not None else self.config.vision_config['vision_feature_layer'] |
| 639 | ) |
| 640 | |
| 641 | use_cache = use_cache if use_cache is not None else self.config.use_cache |
| 642 |
nothing calls this directly
no test coverage detected