Get embeddings status for all chunks of the specified document
(document_id: int)
| 232 | |
| 233 | @document_bp.route("/documents/<int:document_id>/chunk/embedding", methods=["GET"]) |
| 234 | def get_document_embeddings(document_id: int): |
| 235 | """Get embeddings status for all chunks of the specified document""" |
| 236 | try: |
| 237 | # Get query parameters, determine whether to return complete embedding vectors |
| 238 | include_vectors = request.args.get("include_vectors", "").lower() == "true" |
| 239 | |
| 240 | chunks = document_service.get_document_chunks(document_id) |
| 241 | if not chunks: |
| 242 | return jsonify( |
| 243 | APIResponse.error(message=f"No chunks found for document {document_id}") |
| 244 | ) |
| 245 | |
| 246 | # Get embeddings from ChromaDB |
| 247 | chunk_embeddings = document_service.get_chunk_embeddings_by_document_id( |
| 248 | document_id |
| 249 | ) |
| 250 | |
| 251 | chunks_info = [ |
| 252 | { |
| 253 | "id": chunk.id, |
| 254 | "content": chunk.content[:100] + "..." |
| 255 | if len(chunk.content) > 100 |
| 256 | else chunk.content, |
| 257 | "has_embedding": chunk.has_embedding, |
| 258 | "embedding_length": len(chunk_embeddings.get(chunk.id, [])) |
| 259 | if chunk_embeddings.get(chunk.id) |
| 260 | else 0, |
| 261 | "embedding_vector": chunk_embeddings.get(chunk.id) |
| 262 | if include_vectors |
| 263 | else None, # Decide whether to include vectors based on parameters |
| 264 | "tags": chunk.tags, |
| 265 | "topic": chunk.topic, |
| 266 | } |
| 267 | for chunk in chunks |
| 268 | ] |
| 269 | |
| 270 | return jsonify( |
| 271 | APIResponse.success( |
| 272 | data={ |
| 273 | "document_id": document_id, |
| 274 | "total_chunks": len(chunks), |
| 275 | "chunks_with_embeddings": len( |
| 276 | [c for c in chunks if c.has_embedding] |
| 277 | ), |
| 278 | "chunks": chunks_info, |
| 279 | } |
| 280 | ) |
| 281 | ) |
| 282 | |
| 283 | except Exception as e: |
| 284 | logger.error( |
| 285 | f"Error getting embeddings for document {document_id}: {str(e)}", |
| 286 | exc_info=True, |
| 287 | ) |
| 288 | return jsonify( |
| 289 | APIResponse.error( |
| 290 | message=f"Error getting embeddings for document {document_id}: {str(e)}" |
| 291 | ) |
nothing calls this directly
no test coverage detected