Enhanced indexing function with real-time progress tracking
(file_paths: List[str], session_id: str)
| 143 | ServerSentEventsHandler.send_event(self.session_id, "progress", event_data) |
| 144 | |
| 145 | def run_indexing_with_progress(file_paths: List[str], session_id: str): |
| 146 | """Enhanced indexing function with real-time progress tracking""" |
| 147 | from rag_system.pipelines.indexing_pipeline import IndexingPipeline |
| 148 | from rag_system.utils.ollama_client import OllamaClient |
| 149 | import json |
| 150 | |
| 151 | try: |
| 152 | # Send initial status |
| 153 | ServerSentEventsHandler.send_event(session_id, "status", { |
| 154 | "message": "Initializing indexing pipeline...", |
| 155 | "session_id": session_id |
| 156 | }) |
| 157 | |
| 158 | # Load configuration |
| 159 | config_file = "batch_indexing_config.json" |
| 160 | try: |
| 161 | with open(config_file, 'r') as f: |
| 162 | config = json.load(f) |
| 163 | except FileNotFoundError: |
| 164 | # Fallback to default config |
| 165 | config = { |
| 166 | "embedding_model_name": "Qwen/Qwen3-Embedding-0.6B", |
| 167 | "indexing": { |
| 168 | "embedding_batch_size": 50, |
| 169 | "enrichment_batch_size": 10, |
| 170 | "enable_progress_tracking": True |
| 171 | }, |
| 172 | "contextual_enricher": {"enabled": True, "window_size": 1}, |
| 173 | "retrievers": { |
| 174 | "dense": {"enabled": True, "lancedb_table_name": "default_text_table"}, |
| 175 | "bm25": {"enabled": True, "index_name": "default_bm25_index"} |
| 176 | }, |
| 177 | "storage": { |
| 178 | "chunk_store_path": "./index_store/chunks/chunks.pkl", |
| 179 | "lancedb_uri": "./index_store/lancedb", |
| 180 | "bm25_path": "./index_store/bm25" |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | # Initialize components |
| 185 | ollama_client = OllamaClient() |
| 186 | ollama_config = { |
| 187 | "generation_model": "llama3.2:1b", |
| 188 | "embedding_model": "mxbai-embed-large" |
| 189 | } |
| 190 | |
| 191 | # Create enhanced pipeline |
| 192 | pipeline = IndexingPipeline(config, ollama_client, ollama_config) |
| 193 | |
| 194 | # Create progress tracker for the overall process |
| 195 | total_steps = 6 # Rough estimate of pipeline steps |
| 196 | step_tracker = RealtimeProgressTracker(total_steps, "Document Indexing", session_id) |
| 197 | |
| 198 | with timer("Complete Indexing Pipeline"): |
| 199 | try: |
| 200 | # Step 1: Document Processing |
| 201 | step_tracker.update(1, current_step="Processing documents...") |
| 202 |
no test coverage detected