hub / github.com/llmware-ai/llmware / stream

Method stream

llmware/models.py:3792–3933 · view source on GitHub ↗

Executes stream generation inference on model.

(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None,
               inference_dict=None)

Source from the content-addressed store, hash-verified

3790	return output_response
3791
3792	def stream(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None,
3793	inference_dict=None):
3794
3795	""" Executes stream generation inference on model. """
3796
3797	from llmware.configs import ONNXConfig
3798	legacy = ONNXConfig().get_legacy_flag()
3799
3800	# first prepare the prompt
3801	t0 = time.time()
3802
3803	self.prompt = prompt
3804
3805	if add_context:
3806	self.add_context = add_context
3807
3808	if add_prompt_engineering:
3809	self.add_prompt_engineering = add_prompt_engineering
3810
3811	# add defaults if add_prompt_engineering not set
3812	if not self.add_prompt_engineering:
3813
3814	if self.add_context:
3815	self.add_prompt_engineering = "default_with_context"
3816	else:
3817	self.add_prompt_engineering = "default_no_context"
3818
3819	# end - defaults update
3820
3821	# show warning if function calling model
3822	if self.fc_supported:
3823	logger.warning("ONNXGenerativeModel - this is a function calling model - "
3824	"using .inference may lead to unexpected "
3825	"results. Recommended to use the .function_call method to "
3826	"ensure correct prompt template packaging.")
3827
3828	if inference_dict:
3829
3830	if "temperature" in inference_dict:
3831	self.temperature = inference_dict["temperature"]
3832
3833	if "max_tokens" in inference_dict:
3834	self.target_requested_output_tokens = inference_dict["max_tokens"]
3835
3836	self.preview()
3837
3838	# START - route to api endpoint
3839	if self.api_endpoint:
3840	return self.inference_over_api_endpoint(self.prompt, context=self.add_context,
3841	inference_dict=inference_dict)
3842	# END - route to api endpoint
3843
3844	text_prompt = self.prompt
3845
3846	if self.add_prompt_engineering:
3847	prompt_enriched = self.prompt_engineer(self.prompt, self.add_context, inference_dict=inference_dict)
3848	prompt_final = prompt_enriched
3849

Callers

nothing calls this directly

Calls 10

inference_over_api_endpointMethod · 0.95

prompt_engineerMethod · 0.95

register_top_logitsMethod · 0.95

ONNXConfigClass · 0.90

LLMWareExceptionClass · 0.90

get_legacy_flagMethod · 0.80

previewMethod · 0.80

encodeMethod · 0.80

decodeMethod · 0.80

registerMethod · 0.80

Tested by

no test coverage detected