Triggers the document indexing pipeline for specific files.
(self)
| 499 | self.send_json_response({"error": f"Server error: {str(e)}"}, status_code=500) |
| 500 | |
| 501 | def handle_index(self): |
| 502 | """Triggers the document indexing pipeline for specific files.""" |
| 503 | try: |
| 504 | content_length = int(self.headers['Content-Length']) |
| 505 | post_data = self.rfile.read(content_length) |
| 506 | data = json.loads(post_data.decode('utf-8')) |
| 507 | |
| 508 | file_paths = data.get('file_paths') |
| 509 | session_id = data.get('session_id') |
| 510 | compose_flag = data.get('compose_sub_answers') |
| 511 | decomp_flag = data.get('query_decompose') |
| 512 | ai_rerank_flag = data.get('ai_rerank') |
| 513 | ctx_expand_flag = data.get('context_expand') |
| 514 | enable_latechunk = bool(data.get("enable_latechunk", False)) |
| 515 | enable_docling_chunk = bool(data.get("enable_docling_chunk", False)) |
| 516 | |
| 517 | # 🆕 NEW CONFIGURATION OPTIONS: |
| 518 | chunk_size = int(data.get("chunk_size", 512)) |
| 519 | chunk_overlap = int(data.get("chunk_overlap", 64)) |
| 520 | retrieval_mode = data.get("retrieval_mode", "hybrid") |
| 521 | window_size = int(data.get("window_size", 2)) |
| 522 | enable_enrich = bool(data.get("enable_enrich", True)) |
| 523 | embedding_model = data.get('embeddingModel') |
| 524 | enrich_model = data.get('enrichModel') |
| 525 | overview_model = data.get('overviewModel') or data.get('overview_model_name') |
| 526 | batch_size_embed = int(data.get("batch_size_embed", 50)) |
| 527 | batch_size_enrich = int(data.get("batch_size_enrich", 25)) |
| 528 | |
| 529 | if not file_paths or not isinstance(file_paths, list): |
| 530 | self.send_json_response({ |
| 531 | "error": "A 'file_paths' list is required." |
| 532 | }, status_code=400) |
| 533 | return |
| 534 | |
| 535 | # Allow explicit table_name override |
| 536 | table_name = data.get('table_name') |
| 537 | if not table_name and session_id: |
| 538 | table_name = _get_table_name_for_session(session_id) |
| 539 | |
| 540 | # The INDEXING_PIPELINE is already initialized. We just need to use it. |
| 541 | # If a session-specific table is needed, we can override the config for this run. |
| 542 | if table_name: |
| 543 | import copy |
| 544 | config_override = copy.deepcopy(INDEXING_PIPELINE.config) |
| 545 | config_override["storage"]["text_table_name"] = table_name |
| 546 | config_override.setdefault("retrievers", {}).setdefault("dense", {})["lancedb_table_name"] = table_name |
| 547 | |
| 548 | # 🔧 Configure late chunking |
| 549 | if enable_latechunk: |
| 550 | config_override["retrievers"].setdefault("latechunk", {})["enabled"] = True |
| 551 | else: |
| 552 | # ensure disabled if not requested |
| 553 | config_override["retrievers"].setdefault("latechunk", {})["enabled"] = False |
| 554 | |
| 555 | # 🔧 Configure docling chunking |
| 556 | if enable_docling_chunk: |
| 557 | config_override["chunker_mode"] = "docling" |
| 558 |
no test coverage detected