MCPcopy
hub / github.com/PromtEngineer/localGPT / handle_index

Method handle_index

rag_system/api_server.py:501–690  ·  view source on GitHub ↗

Triggers the document indexing pipeline for specific files.

(self)

Source from the content-addressed store, hash-verified

499 self.send_json_response({"error": f"Server error: {str(e)}"}, status_code=500)
500
501 def handle_index(self):
502 """Triggers the document indexing pipeline for specific files."""
503 try:
504 content_length = int(self.headers['Content-Length'])
505 post_data = self.rfile.read(content_length)
506 data = json.loads(post_data.decode('utf-8'))
507
508 file_paths = data.get('file_paths')
509 session_id = data.get('session_id')
510 compose_flag = data.get('compose_sub_answers')
511 decomp_flag = data.get('query_decompose')
512 ai_rerank_flag = data.get('ai_rerank')
513 ctx_expand_flag = data.get('context_expand')
514 enable_latechunk = bool(data.get("enable_latechunk", False))
515 enable_docling_chunk = bool(data.get("enable_docling_chunk", False))
516
517 # 🆕 NEW CONFIGURATION OPTIONS:
518 chunk_size = int(data.get("chunk_size", 512))
519 chunk_overlap = int(data.get("chunk_overlap", 64))
520 retrieval_mode = data.get("retrieval_mode", "hybrid")
521 window_size = int(data.get("window_size", 2))
522 enable_enrich = bool(data.get("enable_enrich", True))
523 embedding_model = data.get('embeddingModel')
524 enrich_model = data.get('enrichModel')
525 overview_model = data.get('overviewModel') or data.get('overview_model_name')
526 batch_size_embed = int(data.get("batch_size_embed", 50))
527 batch_size_enrich = int(data.get("batch_size_enrich", 25))
528
529 if not file_paths or not isinstance(file_paths, list):
530 self.send_json_response({
531 "error": "A 'file_paths' list is required."
532 }, status_code=400)
533 return
534
535 # Allow explicit table_name override
536 table_name = data.get('table_name')
537 if not table_name and session_id:
538 table_name = _get_table_name_for_session(session_id)
539
540 # The INDEXING_PIPELINE is already initialized. We just need to use it.
541 # If a session-specific table is needed, we can override the config for this run.
542 if table_name:
543 import copy
544 config_override = copy.deepcopy(INDEXING_PIPELINE.config)
545 config_override["storage"]["text_table_name"] = table_name
546 config_override.setdefault("retrievers", {}).setdefault("dense", {})["lancedb_table_name"] = table_name
547
548 # 🔧 Configure late chunking
549 if enable_latechunk:
550 config_override["retrievers"].setdefault("latechunk", {})["enabled"] = True
551 else:
552 # ensure disabled if not requested
553 config_override["retrievers"].setdefault("latechunk", {})["enabled"] = False
554
555 # 🔧 Configure docling chunking
556 if enable_docling_chunk:
557 config_override["chunker_mode"] = "docling"
558

Callers 1

do_POSTMethod · 0.95

Calls 4

send_json_responseMethod · 0.95
update_index_metadataMethod · 0.80
runMethod · 0.45

Tested by

no test coverage detected