Executes generation inference on model.
(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None,
inference_dict=None)
| 4640 | return ovg.StreamingStatus.RUNNING |
| 4641 | |
| 4642 | def inference(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None, |
| 4643 | inference_dict=None): |
| 4644 | |
| 4645 | """ Executes generation inference on model. """ |
| 4646 | |
| 4647 | # first prepare the prompt |
| 4648 | self.prompt = prompt |
| 4649 | |
| 4650 | if add_context: |
| 4651 | self.add_context = add_context |
| 4652 | |
| 4653 | self.context = self.add_context |
| 4654 | |
| 4655 | if add_prompt_engineering: |
| 4656 | self.add_prompt_engineering = add_prompt_engineering |
| 4657 | |
| 4658 | # add defaults if add_prompt_engineering not set |
| 4659 | if not self.add_prompt_engineering: |
| 4660 | |
| 4661 | if self.add_context: |
| 4662 | self.add_prompt_engineering = "default_with_context" |
| 4663 | else: |
| 4664 | self.add_prompt_engineering = "default_no_context" |
| 4665 | |
| 4666 | # end - defaults update |
| 4667 | |
| 4668 | # show warning if function calling model |
| 4669 | if self.fc_supported: |
| 4670 | logger.warning("OVGenerativeModel - this is a function calling model - using .inference may lead " |
| 4671 | "to unexpected results. Recommended to use the .function_call method to ensure " |
| 4672 | "correct prompt template packaging.") |
| 4673 | |
| 4674 | if inference_dict: |
| 4675 | |
| 4676 | if "temperature" in inference_dict: |
| 4677 | self.temperature = inference_dict["temperature"] |
| 4678 | |
| 4679 | if "max_tokens" in inference_dict: |
| 4680 | self.target_requested_output_tokens = inference_dict["max_tokens"] |
| 4681 | |
| 4682 | self.preview() |
| 4683 | |
| 4684 | # START - route to api endpoint |
| 4685 | if self.api_endpoint: |
| 4686 | return self.inference_over_api_endpoint(self.prompt, context=self.add_context, |
| 4687 | inference_dict=inference_dict) |
| 4688 | # END - route to api endpoint |
| 4689 | |
| 4690 | text_prompt = self.prompt |
| 4691 | |
| 4692 | if self.add_prompt_engineering: |
| 4693 | prompt_enriched = self.prompt_engineer(self.prompt, self.add_context, inference_dict=inference_dict) |
| 4694 | prompt_final = prompt_enriched |
| 4695 | |
| 4696 | # text_prompt = prompt_final + "\n" |
| 4697 | |
| 4698 | # most models perform better with no trailing space or line-break at the end of prompt |
| 4699 | # -- in most cases, the trailing space will be "" |
nothing calls this directly
no test coverage detected