Executes stream generation inference on model.
(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None,
inference_dict=None)
| 3790 | return output_response |
| 3791 | |
| 3792 | def stream(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None, |
| 3793 | inference_dict=None): |
| 3794 | |
| 3795 | """ Executes stream generation inference on model. """ |
| 3796 | |
| 3797 | from llmware.configs import ONNXConfig |
| 3798 | legacy = ONNXConfig().get_legacy_flag() |
| 3799 | |
| 3800 | # first prepare the prompt |
| 3801 | t0 = time.time() |
| 3802 | |
| 3803 | self.prompt = prompt |
| 3804 | |
| 3805 | if add_context: |
| 3806 | self.add_context = add_context |
| 3807 | |
| 3808 | if add_prompt_engineering: |
| 3809 | self.add_prompt_engineering = add_prompt_engineering |
| 3810 | |
| 3811 | # add defaults if add_prompt_engineering not set |
| 3812 | if not self.add_prompt_engineering: |
| 3813 | |
| 3814 | if self.add_context: |
| 3815 | self.add_prompt_engineering = "default_with_context" |
| 3816 | else: |
| 3817 | self.add_prompt_engineering = "default_no_context" |
| 3818 | |
| 3819 | # end - defaults update |
| 3820 | |
| 3821 | # show warning if function calling model |
| 3822 | if self.fc_supported: |
| 3823 | logger.warning("ONNXGenerativeModel - this is a function calling model - " |
| 3824 | "using .inference may lead to unexpected " |
| 3825 | "results. Recommended to use the .function_call method to " |
| 3826 | "ensure correct prompt template packaging.") |
| 3827 | |
| 3828 | if inference_dict: |
| 3829 | |
| 3830 | if "temperature" in inference_dict: |
| 3831 | self.temperature = inference_dict["temperature"] |
| 3832 | |
| 3833 | if "max_tokens" in inference_dict: |
| 3834 | self.target_requested_output_tokens = inference_dict["max_tokens"] |
| 3835 | |
| 3836 | self.preview() |
| 3837 | |
| 3838 | # START - route to api endpoint |
| 3839 | if self.api_endpoint: |
| 3840 | return self.inference_over_api_endpoint(self.prompt, context=self.add_context, |
| 3841 | inference_dict=inference_dict) |
| 3842 | # END - route to api endpoint |
| 3843 | |
| 3844 | text_prompt = self.prompt |
| 3845 | |
| 3846 | if self.add_prompt_engineering: |
| 3847 | prompt_enriched = self.prompt_engineer(self.prompt, self.add_context, inference_dict=inference_dict) |
| 3848 | prompt_final = prompt_enriched |
| 3849 |
nothing calls this directly
no test coverage detected