Executes stream generation inference on model. NOTE: operates differently than other stream methods in LLMWare - the method is not a generator, but rather the streaming update is provided through passing a streamer function to the OpenVINO backend - which will
(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None,
inference_dict=None, streamer=None)
| 5007 | return output |
| 5008 | |
| 5009 | def stream(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None, |
| 5010 | inference_dict=None, streamer=None): |
| 5011 | |
| 5012 | """ Executes stream generation inference on model. |
| 5013 | |
| 5014 | NOTE: operates differently than other stream methods in LLMWare - |
| 5015 | the method is not a generator, but rather the streaming update is |
| 5016 | provided through passing a streamer function to the OpenVINO |
| 5017 | backend - which will be called at each step of the generation |
| 5018 | cycle. |
| 5019 | |
| 5020 | Sample call: |
| 5021 | |
| 5022 | # will automatically use default streamer to print to console |
| 5023 | response = model.stream('Where is Paris?') |
| 5024 | |
| 5025 | # pass a custom streaming function |
| 5026 | response = model.stream('Where is Rome?', streamer=my_streamer) |
| 5027 | |
| 5028 | Streamer function example: .ov_default_streamer in this model class |
| 5029 | |
| 5030 | """ |
| 5031 | |
| 5032 | # first prepare the prompt |
| 5033 | self.prompt = prompt |
| 5034 | |
| 5035 | if add_context: |
| 5036 | self.add_context = add_context |
| 5037 | |
| 5038 | self.context = self.add_context |
| 5039 | |
| 5040 | if add_prompt_engineering: |
| 5041 | self.add_prompt_engineering = add_prompt_engineering |
| 5042 | |
| 5043 | # add defaults if add_prompt_engineering not set |
| 5044 | if not self.add_prompt_engineering: |
| 5045 | |
| 5046 | if self.add_context: |
| 5047 | self.add_prompt_engineering = "default_with_context" |
| 5048 | else: |
| 5049 | self.add_prompt_engineering = "default_no_context" |
| 5050 | |
| 5051 | # end - defaults update |
| 5052 | |
| 5053 | # show warning if function calling model |
| 5054 | if self.fc_supported: |
| 5055 | logger.warning("OVGenerativeModel - this is a function calling model - using .inference may lead " |
| 5056 | "to unexpected results. Recommended to use the .function_call method to ensure " |
| 5057 | "correct prompt template packaging.") |
| 5058 | |
| 5059 | if inference_dict: |
| 5060 | |
| 5061 | if "temperature" in inference_dict: |
| 5062 | self.temperature = inference_dict["temperature"] |
| 5063 | |
| 5064 | if "max_tokens" in inference_dict: |
| 5065 | self.target_requested_output_tokens = inference_dict["max_tokens"] |
| 5066 |
nothing calls this directly
no test coverage detected