(self, query: str, history: list)
| 169 | |
| 170 | # ---------------- Asynchronous triage using Ollama ---------------- |
| 171 | async def _triage_query_async(self, query: str, history: list) -> str: |
| 172 | |
| 173 | print(f"๐ ROUTING DEBUG: Starting triage for query: '{query[:100]}...'") |
| 174 | |
| 175 | # 1๏ธโฃ Fast routing using precomputed overviews (if available) |
| 176 | print(f"๐ ROUTING DEBUG: Attempting overview-based routing...") |
| 177 | routed = self._route_via_overviews(query) |
| 178 | if routed: |
| 179 | print(f"โ ROUTING DEBUG: Overview routing decided: '{routed}'") |
| 180 | return routed |
| 181 | else: |
| 182 | print(f"โ ROUTING DEBUG: Overview routing returned None, falling back to LLM triage") |
| 183 | |
| 184 | if history: |
| 185 | # If there's history, the query is likely a follow-up, so we default to RAG. |
| 186 | # A more advanced implementation could use an LLM to see if the new query |
| 187 | # changes the topic entirely. |
| 188 | print(f"๐ ROUTING DEBUG: History exists, defaulting to 'rag_query'") |
| 189 | return "rag_query" |
| 190 | |
| 191 | print(f"๐ค ROUTING DEBUG: No history, using LLM fallback triage...") |
| 192 | prompt = f""" |
| 193 | You are a query routing expert. Analyze the user's question and decide which backend should handle it. |
| 194 | |
| 195 | Choose **exactly one** category: |
| 196 | |
| 197 | 1. "rag_query" โ Questions about the user's uploaded documents or specific document content that should be searched. Examples: "What is the invoice amount?", "Summarize the research paper", "What companies are mentioned?" |
| 198 | |
| 199 | 2. "direct_answer" โ General knowledge questions, greetings, or queries unrelated to uploaded documents. Examples: "Who are the CEOs of Tesla and Amazon?", "What is the capital of France?", "Hello", "Explain quantum physics" |
| 200 | |
| 201 | 3. "graph_query" โ Specific factual relations for knowledge-graph lookup (currently limited use) |
| 202 | |
| 203 | IMPORTANT: For general world knowledge about well-known companies, people, or facts NOT related to uploaded documents, choose "direct_answer". |
| 204 | |
| 205 | User query: "{query}" |
| 206 | |
| 207 | Respond with JSON: {{"category": "<your_choice>"}} |
| 208 | """ |
| 209 | resp = self.llm_client.generate_completion( |
| 210 | model=self.ollama_config["generation_model"], prompt=prompt, format="json" |
| 211 | ) |
| 212 | try: |
| 213 | data = json.loads(resp.get("response", "{}")) |
| 214 | decision = data.get("category", "rag_query") |
| 215 | print(f"๐ค ROUTING DEBUG: LLM fallback triage decided: '{decision}'") |
| 216 | return decision |
| 217 | except json.JSONDecodeError: |
| 218 | print(f"โ ROUTING DEBUG: LLM fallback triage JSON parsing failed, defaulting to 'rag_query'") |
| 219 | return "rag_query" |
| 220 | |
| 221 | def _run_graph_query(self, query: str, history: list) -> Dict[str, Any]: |
| 222 | contextual_query = self._format_query_with_history(query, history) |
no test coverage detected