MCPcopy
hub / github.com/llmware-ai/llmware / stream

Method stream

llmware/models.py:3792–3933  ·  view source on GitHub ↗

Executes stream generation inference on model.

(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None,
               inference_dict=None)

Source from the content-addressed store, hash-verified

3790 return output_response
3791
3792 def stream(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None,
3793 inference_dict=None):
3794
3795 """ Executes stream generation inference on model. """
3796
3797 from llmware.configs import ONNXConfig
3798 legacy = ONNXConfig().get_legacy_flag()
3799
3800 # first prepare the prompt
3801 t0 = time.time()
3802
3803 self.prompt = prompt
3804
3805 if add_context:
3806 self.add_context = add_context
3807
3808 if add_prompt_engineering:
3809 self.add_prompt_engineering = add_prompt_engineering
3810
3811 # add defaults if add_prompt_engineering not set
3812 if not self.add_prompt_engineering:
3813
3814 if self.add_context:
3815 self.add_prompt_engineering = "default_with_context"
3816 else:
3817 self.add_prompt_engineering = "default_no_context"
3818
3819 # end - defaults update
3820
3821 # show warning if function calling model
3822 if self.fc_supported:
3823 logger.warning("ONNXGenerativeModel - this is a function calling model - "
3824 "using .inference may lead to unexpected "
3825 "results. Recommended to use the .function_call method to "
3826 "ensure correct prompt template packaging.")
3827
3828 if inference_dict:
3829
3830 if "temperature" in inference_dict:
3831 self.temperature = inference_dict["temperature"]
3832
3833 if "max_tokens" in inference_dict:
3834 self.target_requested_output_tokens = inference_dict["max_tokens"]
3835
3836 self.preview()
3837
3838 # START - route to api endpoint
3839 if self.api_endpoint:
3840 return self.inference_over_api_endpoint(self.prompt, context=self.add_context,
3841 inference_dict=inference_dict)
3842 # END - route to api endpoint
3843
3844 text_prompt = self.prompt
3845
3846 if self.add_prompt_engineering:
3847 prompt_enriched = self.prompt_engineer(self.prompt, self.add_context, inference_dict=inference_dict)
3848 prompt_final = prompt_enriched
3849

Callers

nothing calls this directly

Calls 10

prompt_engineerMethod · 0.95
register_top_logitsMethod · 0.95
ONNXConfigClass · 0.90
LLMWareExceptionClass · 0.90
get_legacy_flagMethod · 0.80
previewMethod · 0.80
encodeMethod · 0.80
decodeMethod · 0.80
registerMethod · 0.80

Tested by

no test coverage detected