hub / github.com/llmware-ai/llmware / inference

Method inference

llmware/models.py:4642–4764 · view source on GitHub ↗

Executes generation inference on model.

(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None,
                  inference_dict=None)

Source from the content-addressed store, hash-verified

4640	return ovg.StreamingStatus.RUNNING
4641
4642	def inference(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None,
4643	inference_dict=None):
4644
4645	""" Executes generation inference on model. """
4646
4647	# first prepare the prompt
4648	self.prompt = prompt
4649
4650	if add_context:
4651	self.add_context = add_context
4652
4653	self.context = self.add_context
4654
4655	if add_prompt_engineering:
4656	self.add_prompt_engineering = add_prompt_engineering
4657
4658	# add defaults if add_prompt_engineering not set
4659	if not self.add_prompt_engineering:
4660
4661	if self.add_context:
4662	self.add_prompt_engineering = "default_with_context"
4663	else:
4664	self.add_prompt_engineering = "default_no_context"
4665
4666	# end - defaults update
4667
4668	# show warning if function calling model
4669	if self.fc_supported:
4670	logger.warning("OVGenerativeModel - this is a function calling model - using .inference may lead "
4671	"to unexpected results. Recommended to use the .function_call method to ensure "
4672	"correct prompt template packaging.")
4673
4674	if inference_dict:
4675
4676	if "temperature" in inference_dict:
4677	self.temperature = inference_dict["temperature"]
4678
4679	if "max_tokens" in inference_dict:
4680	self.target_requested_output_tokens = inference_dict["max_tokens"]
4681
4682	self.preview()
4683
4684	# START - route to api endpoint
4685	if self.api_endpoint:
4686	return self.inference_over_api_endpoint(self.prompt, context=self.add_context,
4687	inference_dict=inference_dict)
4688	# END - route to api endpoint
4689
4690	text_prompt = self.prompt
4691
4692	if self.add_prompt_engineering:
4693	prompt_enriched = self.prompt_engineer(self.prompt, self.add_context, inference_dict=inference_dict)
4694	prompt_final = prompt_enriched
4695
4696	# text_prompt = prompt_final + "\n"
4697
4698	# most models perform better with no trailing space or line-break at the end of prompt
4699	# -- in most cases, the trailing space will be ""

Callers

nothing calls this directly

Calls 6

inference_over_api_endpointMethod · 0.95

prompt_engineerMethod · 0.95

ov_token_counterMethod · 0.95

_generate_ov_genaiMethod · 0.95

previewMethod · 0.80

registerMethod · 0.80

Tested by

no test coverage detected