MCPcopy
hub / github.com/mindverse/Second-Me / process_all_chunks

Function process_all_chunks

lpm_kernel/api/domains/documents/routes.py:144–191  ·  view source on GitHub ↗

Process chunks for all documents in batch

()

Source from the content-addressed store, hash-verified

142
143@document_bp.route("/documents/chunks/process", methods=["POST"])
144def process_all_chunks():
145 """Process chunks for all documents in batch"""
146 try:
147 config = Config.from_env()
148 chunker = DocumentChunker(
149 chunk_size=int(config.get("DOCUMENT_CHUNK_SIZE")),
150 overlap=int(config.get("DOCUMENT_CHUNK_OVERLAP")),
151 )
152
153 documents = document_service.list_documents()
154 processed, failed = 0, 0
155
156 chunk_service = ChunkService()
157 for doc in documents:
158 try:
159 if not doc.raw_content:
160 logger.warning(f"Document {doc.id} has no content, skipping...")
161 failed += 1
162 continue
163
164 # Split into chunks and save
165 chunks = chunker.split(doc.raw_content)
166 for chunk in chunks:
167 chunk.document_id = doc.id
168 chunk_service.save_chunk(chunk)
169
170 processed += 1
171 logger.info(
172 f"Document {doc.id} processed: {len(chunks)} chunks created"
173 )
174
175 except Exception as e:
176 logger.error(f"Failed to process document {doc.id}: {str(e)}")
177 failed += 1
178
179 return jsonify(
180 APIResponse.success(
181 data={
182 "total": len(documents),
183 "processed": processed,
184 "failed": failed,
185 }
186 )
187 )
188
189 except Exception as e:
190 logger.error(f"Chunk processing failed: {str(e)}")
191 return jsonify(APIResponse.error(message=f"Chunk processing failed: {str(e)}"))
192
193
194@document_bp.route("/documents/<int:document_id>/chunk/embedding", methods=["POST"])

Callers

nothing calls this directly

Calls 9

splitMethod · 0.95
save_chunkMethod · 0.95
DocumentChunkerClass · 0.90
ChunkServiceClass · 0.90
from_envMethod · 0.80
errorMethod · 0.80
successMethod · 0.80
getMethod · 0.45
list_documentsMethod · 0.45

Tested by

no test coverage detected