MCPcopy
hub / github.com/llmware-ai/llmware / inference

Method inference

llmware/models.py:4642–4764  ·  view source on GitHub ↗

Executes generation inference on model.

(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None,
                  inference_dict=None)

Source from the content-addressed store, hash-verified

4640 return ovg.StreamingStatus.RUNNING
4641
4642 def inference(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None,
4643 inference_dict=None):
4644
4645 """ Executes generation inference on model. """
4646
4647 # first prepare the prompt
4648 self.prompt = prompt
4649
4650 if add_context:
4651 self.add_context = add_context
4652
4653 self.context = self.add_context
4654
4655 if add_prompt_engineering:
4656 self.add_prompt_engineering = add_prompt_engineering
4657
4658 # add defaults if add_prompt_engineering not set
4659 if not self.add_prompt_engineering:
4660
4661 if self.add_context:
4662 self.add_prompt_engineering = "default_with_context"
4663 else:
4664 self.add_prompt_engineering = "default_no_context"
4665
4666 # end - defaults update
4667
4668 # show warning if function calling model
4669 if self.fc_supported:
4670 logger.warning("OVGenerativeModel - this is a function calling model - using .inference may lead "
4671 "to unexpected results. Recommended to use the .function_call method to ensure "
4672 "correct prompt template packaging.")
4673
4674 if inference_dict:
4675
4676 if "temperature" in inference_dict:
4677 self.temperature = inference_dict["temperature"]
4678
4679 if "max_tokens" in inference_dict:
4680 self.target_requested_output_tokens = inference_dict["max_tokens"]
4681
4682 self.preview()
4683
4684 # START - route to api endpoint
4685 if self.api_endpoint:
4686 return self.inference_over_api_endpoint(self.prompt, context=self.add_context,
4687 inference_dict=inference_dict)
4688 # END - route to api endpoint
4689
4690 text_prompt = self.prompt
4691
4692 if self.add_prompt_engineering:
4693 prompt_enriched = self.prompt_engineer(self.prompt, self.add_context, inference_dict=inference_dict)
4694 prompt_final = prompt_enriched
4695
4696 # text_prompt = prompt_final + "\n"
4697
4698 # most models perform better with no trailing space or line-break at the end of prompt
4699 # -- in most cases, the trailing space will be ""

Callers

nothing calls this directly

Calls 6

prompt_engineerMethod · 0.95
ov_token_counterMethod · 0.95
_generate_ov_genaiMethod · 0.95
previewMethod · 0.80
registerMethod · 0.80

Tested by

no test coverage detected