Handle chat within a specific session. Intelligently routes between direct LLM (fast) and RAG pipeline (document-aware).
(self, session_id: str)
| 270 | }, status_code=500) |
| 271 | |
| 272 | def handle_session_chat(self, session_id: str): |
| 273 | """ |
| 274 | Handle chat within a specific session. |
| 275 | Intelligently routes between direct LLM (fast) and RAG pipeline (document-aware). |
| 276 | """ |
| 277 | try: |
| 278 | session = db.get_session(session_id) |
| 279 | if not session: |
| 280 | self.send_json_response({"error": "Session not found"}, status_code=404) |
| 281 | return |
| 282 | |
| 283 | content_length = int(self.headers['Content-Length']) |
| 284 | post_data = self.rfile.read(content_length) |
| 285 | data = json.loads(post_data.decode('utf-8')) |
| 286 | message = data.get('message', '') |
| 287 | |
| 288 | if not message: |
| 289 | self.send_json_response({"error": "Message is required"}, status_code=400) |
| 290 | return |
| 291 | |
| 292 | if session['message_count'] == 0: |
| 293 | title = generate_session_title(message) |
| 294 | db.update_session_title(session_id, title) |
| 295 | |
| 296 | # Add user message to database first |
| 297 | user_message_id = db.add_message(session_id, message, "user") |
| 298 | |
| 299 | # 🎯 SMART ROUTING: Decide between direct LLM vs RAG |
| 300 | idx_ids = db.get_indexes_for_session(session_id) |
| 301 | force_rag = bool(data.get("force_rag", False)) |
| 302 | use_rag = True if force_rag else self._should_use_rag(message, idx_ids) |
| 303 | |
| 304 | if use_rag: |
| 305 | # 🔍 --- Use RAG Pipeline for Document-Related Queries --- |
| 306 | print(f"🔍 Using RAG pipeline for document query: '{message[:50]}...'") |
| 307 | response_text, source_docs = self._handle_rag_query(session_id, message, data, idx_ids) |
| 308 | else: |
| 309 | # ⚡ --- Use Direct LLM for General Queries (FAST) --- |
| 310 | print(f"⚡ Using direct LLM for general query: '{message[:50]}...'") |
| 311 | response_text, source_docs = self._handle_direct_llm_query(session_id, message, session) |
| 312 | |
| 313 | # Add AI response to database |
| 314 | ai_message_id = db.add_message(session_id, response_text, "assistant") |
| 315 | |
| 316 | updated_session = db.get_session(session_id) |
| 317 | |
| 318 | # Send response with proper error handling |
| 319 | self.send_json_response({ |
| 320 | "response": response_text, |
| 321 | "session": updated_session, |
| 322 | "source_documents": source_docs, |
| 323 | "used_rag": use_rag |
| 324 | }) |
| 325 | |
| 326 | except BrokenPipeError: |
| 327 | # Client disconnected - this is normal for long queries, just log it |
| 328 | print(f"⚠️ Client disconnected during RAG processing for query: '{message[:30]}...'") |
| 329 | except json.JSONDecodeError: |
no test coverage detected