LLM response to a prompt. Args: message (str|ChatDocument): prompt string, or ChatDocument object Returns: Response from LLM, packaged as a ChatDocument
(
self,
message: Optional[str | ChatDocument] = None,
)
| 1204 | |
| 1205 | @no_type_check |
| 1206 | def llm_response( |
| 1207 | self, |
| 1208 | message: Optional[str | ChatDocument] = None, |
| 1209 | ) -> Optional[ChatDocument]: |
| 1210 | """ |
| 1211 | LLM response to a prompt. |
| 1212 | Args: |
| 1213 | message (str|ChatDocument): prompt string, or ChatDocument object |
| 1214 | |
| 1215 | Returns: |
| 1216 | Response from LLM, packaged as a ChatDocument |
| 1217 | """ |
| 1218 | if message is None or not self.llm_can_respond(message): |
| 1219 | return None |
| 1220 | |
| 1221 | if isinstance(message, ChatDocument): |
| 1222 | prompt = message.content |
| 1223 | else: |
| 1224 | prompt = message |
| 1225 | |
| 1226 | with ExitStack() as stack: # for conditionally using rich spinner |
| 1227 | if not self.llm.get_stream(): |
| 1228 | # show rich spinner only if not streaming! |
| 1229 | cm = status("LLM responding to message...") |
| 1230 | stack.enter_context(cm) |
| 1231 | output_len = self.config.llm.model_max_output_tokens |
| 1232 | if ( |
| 1233 | self.num_tokens(prompt) + output_len |
| 1234 | > self.llm.completion_context_length() |
| 1235 | ): |
| 1236 | output_len = self.llm.completion_context_length() - self.num_tokens( |
| 1237 | prompt |
| 1238 | ) |
| 1239 | if output_len < self.config.llm.min_output_tokens: |
| 1240 | raise ValueError( |
| 1241 | """ |
| 1242 | Token-length of Prompt + Output is longer than the |
| 1243 | completion context length of the LLM! |
| 1244 | """ |
| 1245 | ) |
| 1246 | else: |
| 1247 | logger.warning( |
| 1248 | f""" |
| 1249 | Requested output length has been shortened to {output_len} |
| 1250 | so that the total length of Prompt + Output is less than |
| 1251 | the completion context length of the LLM. |
| 1252 | """ |
| 1253 | ) |
| 1254 | if self.llm.get_stream() and not settings.quiet: |
| 1255 | console.print(f"[green]{self.indent}", end="") |
| 1256 | response = self.llm.generate(prompt, output_len) |
| 1257 | |
| 1258 | if not self.llm.get_stream() or response.cached and not settings.quiet: |
| 1259 | # we would have already displayed the msg "live" ONLY if |
| 1260 | # streaming was enabled, AND we did not find a cached response |
| 1261 | # If we are here, it means the response has not yet been displayed. |
| 1262 | cached = "[red](cached)[/red]" if response.cached else "" |
| 1263 | console.print(f"[green]{self.indent}", end="") |