hub / github.com/llmware-ai/llmware / inference_over_api_endpoint

Method inference_over_api_endpoint

llmware/models.py:3942–4001 · view source on GitHub ↗

Called by .inference method when there is an api_endpoint passed in the model constructor. Rather than execute the inference locally, it will be sent over API to inference server.

(self, prompt, context=None, inference_dict=None, get_logits=False)

Source from the content-addressed store, hash-verified

3940	return True
3941
3942	def inference_over_api_endpoint(self, prompt, context=None, inference_dict=None, get_logits=False):
3943
3944	""" Called by .inference method when there is an api_endpoint passed in the model constructor. Rather
3945	than execute the inference locally, it will be sent over API to inference server. """
3946
3947	import ast
3948	import requests
3949
3950	self.prompt = prompt
3951	self.context = context
3952
3953	self.preview()
3954
3955	url = self.api_endpoint + "{}".format("/")
3956	output_raw = requests.post(url, data={"model_name": self.model_name,
3957	"question": self.prompt,
3958	"context": self.context,
3959	"api_key": self.api_key,
3960	"max_output": self.max_output,
3961	"temperature": self.temperature})
3962
3963	try:
3964
3965	output = json.loads(output_raw.text)
3966
3967	# will attempt to unpack logits - but catch any exceptions and skip
3968	if "logits" in output:
3969	try:
3970	logits = ast.literal_eval(output["logits"])
3971	output["logits"] = logits
3972	except:
3973	output["logits"] = []
3974
3975	# will attempt to unpack output tokens - but catch any exceptions and skip
3976	if "output_tokens" in output:
3977	try:
3978	# alt: ot_int = [int(x) for x in output["output_tokens"]]
3979	# alt: output["output_tokens"] = ot_int
3980	output_tokens = ast.literal_eval(output["output_tokens"])
3981	output["output_tokens"] = output_tokens
3982	except:
3983	output["output_tokens"] = []
3984
3985	except:
3986	logger.warning("warning: api inference was not successful")
3987	output = {"llm_response": "api-inference-error", "usage": {}}
3988
3989	# output inference parameters
3990	self.llm_response = output["llm_response"]
3991	self.usage = output["usage"]
3992	self.final_prompt = prompt
3993
3994	if "logits" in output:
3995	self.logits = output["logits"]
3996	if "output_tokens" in output:
3997	self.output_tokens = output["output_tokens"]
3998
3999	self.register()

Callers 2

inferenceMethod · 0.95

streamMethod · 0.95

Calls 3

previewMethod · 0.80

formatMethod · 0.80

registerMethod · 0.80

Tested by

no test coverage detected