Executes stream generation inference on model. NOTE: operates differently than other stream methods in LLMWare - the method is not a generator, but rather the streaming update is provided through passing a streamer function to the OpenVINO backend - which will
(self, prompt, image_path, add_context=None, add_prompt_engineering=None, api_key=None,
inference_dict=None, streamer=None,no_stream=False)
| 5542 | streamer=None, no_stream=True) |
| 5543 | |
| 5544 | def stream(self, prompt, image_path, add_context=None, add_prompt_engineering=None, api_key=None, |
| 5545 | inference_dict=None, streamer=None,no_stream=False): |
| 5546 | |
| 5547 | """ Executes stream generation inference on model. |
| 5548 | |
| 5549 | NOTE: operates differently than other stream methods in LLMWare - |
| 5550 | the method is not a generator, but rather the streaming update is |
| 5551 | provided through passing a streamer function to the OpenVINO |
| 5552 | backend - which will be called at each step of the generation |
| 5553 | cycle. |
| 5554 | |
| 5555 | Sample call: |
| 5556 | |
| 5557 | # will automatically use default streamer to print to console |
| 5558 | response = model.stream('Describe this image', 'C:\\Users\\...') |
| 5559 | |
| 5560 | # pass a custom streaming function |
| 5561 | response = model.stream('Describe this image' 'C:\\Users\\...', streamer=my_streamer) |
| 5562 | |
| 5563 | Streamer function example: .ov_default_streamer in this model class |
| 5564 | |
| 5565 | """ |
| 5566 | |
| 5567 | # first prepare the prompt |
| 5568 | self.prompt = prompt |
| 5569 | |
| 5570 | if inference_dict: |
| 5571 | |
| 5572 | if "temperature" in inference_dict: |
| 5573 | self.temperature = inference_dict["temperature"] |
| 5574 | |
| 5575 | if "max_tokens" in inference_dict: |
| 5576 | self.target_requested_output_tokens = inference_dict["max_tokens"] |
| 5577 | |
| 5578 | self.preview() |
| 5579 | |
| 5580 | text_prompt = self.prompt |
| 5581 | |
| 5582 | # counts the input tokens |
| 5583 | if self.get_token_counts: |
| 5584 | self.input_token_count = self.ov_token_counter(text_prompt) |
| 5585 | else: |
| 5586 | self.input_token_count = 0 |
| 5587 | |
| 5588 | time_start = time.time() |
| 5589 | |
| 5590 | # prepares the image as tensor |
| 5591 | from PIL import Image |
| 5592 | pic = Image.open(image_path).convert("RGB") |
| 5593 | image_data = np.array(pic)[None] |
| 5594 | images = [openvino.Tensor(image_data)] |
| 5595 | |
| 5596 | # main call to inner generate function |
| 5597 | if not streamer and not no_stream: |
| 5598 | streamer = self.ov_default_streamer |
| 5599 | |
| 5600 | output = self._generate_ov_genai(text_prompt, |
| 5601 | image=images, |
no test coverage detected