MCPcopy
hub / github.com/PromtEngineer/localGPT / run_indexing_with_progress

Function run_indexing_with_progress

rag_system/api_server_with_progress.py:145–236  ·  view source on GitHub ↗

Enhanced indexing function with real-time progress tracking

(file_paths: List[str], session_id: str)

Source from the content-addressed store, hash-verified

143 ServerSentEventsHandler.send_event(self.session_id, "progress", event_data)
144
145def run_indexing_with_progress(file_paths: List[str], session_id: str):
146 """Enhanced indexing function with real-time progress tracking"""
147 from rag_system.pipelines.indexing_pipeline import IndexingPipeline
148 from rag_system.utils.ollama_client import OllamaClient
149 import json
150
151 try:
152 # Send initial status
153 ServerSentEventsHandler.send_event(session_id, "status", {
154 "message": "Initializing indexing pipeline...",
155 "session_id": session_id
156 })
157
158 # Load configuration
159 config_file = "batch_indexing_config.json"
160 try:
161 with open(config_file, 'r') as f:
162 config = json.load(f)
163 except FileNotFoundError:
164 # Fallback to default config
165 config = {
166 "embedding_model_name": "Qwen/Qwen3-Embedding-0.6B",
167 "indexing": {
168 "embedding_batch_size": 50,
169 "enrichment_batch_size": 10,
170 "enable_progress_tracking": True
171 },
172 "contextual_enricher": {"enabled": True, "window_size": 1},
173 "retrievers": {
174 "dense": {"enabled": True, "lancedb_table_name": "default_text_table"},
175 "bm25": {"enabled": True, "index_name": "default_bm25_index"}
176 },
177 "storage": {
178 "chunk_store_path": "./index_store/chunks/chunks.pkl",
179 "lancedb_uri": "./index_store/lancedb",
180 "bm25_path": "./index_store/bm25"
181 }
182 }
183
184 # Initialize components
185 ollama_client = OllamaClient()
186 ollama_config = {
187 "generation_model": "llama3.2:1b",
188 "embedding_model": "mxbai-embed-large"
189 }
190
191 # Create enhanced pipeline
192 pipeline = IndexingPipeline(config, ollama_client, ollama_config)
193
194 # Create progress tracker for the overall process
195 total_steps = 6 # Rough estimate of pipeline steps
196 step_tracker = RealtimeProgressTracker(total_steps, "Document Indexing", session_id)
197
198 with timer("Complete Indexing Pipeline"):
199 try:
200 # Step 1: Document Processing
201 step_tracker.update(1, current_step="Processing documents...")
202

Callers 1

run_indexing_threadMethod · 0.85

Calls 8

updateMethod · 0.95
runMethod · 0.95
finishMethod · 0.95
OllamaClientClass · 0.90
IndexingPipelineClass · 0.90
timerFunction · 0.90
send_eventMethod · 0.80

Tested by

no test coverage detected