In typical case with raw_mode = False, then no prompt engineering, just apply a basic assembly of the prompt and context.
(self, prompt, add_context=None, add_prompt_engineering=None, inference_dict=None,
api_key=None)
| 6182 | return output |
| 6183 | |
| 6184 | def inference(self, prompt, add_context=None, add_prompt_engineering=None, inference_dict=None, |
| 6185 | api_key=None): |
| 6186 | |
| 6187 | """ In typical case with raw_mode = False, then no prompt engineering, just apply a basic |
| 6188 | assembly of the prompt and context. """ |
| 6189 | |
| 6190 | self.prompt = prompt |
| 6191 | |
| 6192 | if add_context: |
| 6193 | self.add_context = add_context |
| 6194 | |
| 6195 | if add_prompt_engineering: |
| 6196 | self.add_prompt_engineering = add_prompt_engineering |
| 6197 | |
| 6198 | if inference_dict: |
| 6199 | |
| 6200 | if "temperature" in inference_dict: |
| 6201 | self.temperature = inference_dict["temperature"] |
| 6202 | |
| 6203 | if "max_tokens" in inference_dict: |
| 6204 | self.target_requested_output_tokens = inference_dict["max_tokens"] |
| 6205 | |
| 6206 | # call to preview hook (not implemented by default) |
| 6207 | self.preview() |
| 6208 | |
| 6209 | # default case - pass the prompt received without change |
| 6210 | prompt_enriched = self.prompt |
| 6211 | |
| 6212 | usage = {} |
| 6213 | |
| 6214 | time_start = time.time() |
| 6215 | |
| 6216 | try: |
| 6217 | |
| 6218 | # assumes 'chat' api by default |
| 6219 | |
| 6220 | if self.model_type == "chat": |
| 6221 | |
| 6222 | full_prompt = self.prompt_engineer(prompt_enriched, self.add_context, inference_dict) |
| 6223 | |
| 6224 | messages = [{"role": "user", "content": full_prompt}] |
| 6225 | uri = self.uri + "chat" |
| 6226 | |
| 6227 | response = requests.post(uri, |
| 6228 | json={"model": self.model_name, |
| 6229 | "messages": messages, "stream": self.stream_mode}) |
| 6230 | |
| 6231 | logger.info("update: OllamaModel response - chat - %s ", response.text) |
| 6232 | |
| 6233 | output = json.loads(response.text) |
| 6234 | |
| 6235 | text_out = output["message"]["content"] |
| 6236 | |
| 6237 | pt = 0 |
| 6238 | ct = 0 |
| 6239 | tt = 0 |
| 6240 | |
| 6241 | """ best effort to gather usage data """ |