Interactive processing configuration.
(self)
| 141 | return documents |
| 142 | |
| 143 | def configure_processing(self) -> dict: |
| 144 | """Interactive processing configuration.""" |
| 145 | print("\n⚙️ Processing Configuration") |
| 146 | print("=" * 50) |
| 147 | |
| 148 | print("Configure how documents will be processed:") |
| 149 | |
| 150 | # Basic settings |
| 151 | chunk_size = int(self.get_user_input("Chunk size", "512")) |
| 152 | chunk_overlap = int(self.get_user_input("Chunk overlap", "64")) |
| 153 | |
| 154 | # Advanced settings |
| 155 | print("\nAdvanced options:") |
| 156 | enable_enrich = self.get_user_input("Enable contextual enrichment? (y/n)", "y").lower() == 'y' |
| 157 | enable_latechunk = self.get_user_input("Enable late chunking? (y/n)", "y").lower() == 'y' |
| 158 | enable_docling = self.get_user_input("Enable Docling chunking? (y/n)", "y").lower() == 'y' |
| 159 | |
| 160 | # Model selection |
| 161 | print("\nModel Configuration:") |
| 162 | embedding_model = self.get_user_input("Embedding model", "Qwen/Qwen3-Embedding-0.6B") |
| 163 | generation_model = self.get_user_input("Generation model", "qwen3:0.6b") |
| 164 | |
| 165 | return { |
| 166 | "chunk_size": chunk_size, |
| 167 | "chunk_overlap": chunk_overlap, |
| 168 | "enable_enrich": enable_enrich, |
| 169 | "enable_latechunk": enable_latechunk, |
| 170 | "enable_docling": enable_docling, |
| 171 | "embedding_model": embedding_model, |
| 172 | "generation_model": generation_model, |
| 173 | "retrieval_mode": "hybrid", |
| 174 | "window_size": 2 |
| 175 | } |
| 176 | |
| 177 | def create_index_interactive(self) -> None: |
| 178 | """Run the interactive index creation process.""" |
no test coverage detected