MCPcopy
hub / github.com/SciPhi-AI/R2R / test_document_chunking

Function test_document_chunking

py/tests/unit/document/test_document_processing.py:54–95  ·  view source on GitHub ↗

Test document chunking functionality.

(mock_document_handler, sample_document)

Source from the content-addressed store, hash-verified

52
53@pytest.mark.asyncio
54async def test_document_chunking(mock_document_handler, sample_document):
55 """Test document chunking functionality."""
56 from core.main.services.documents import DocumentProcessingService
57
58 # Setup the chunking service with mocked components
59 service = DocumentProcessingService(document_handler=mock_document_handler)
60
61 # Mock the chunking method
62 original_chunk_method = service.chunk_document
63 service.chunk_document = MagicMock(return_value=[
64 DocumentChunk(
65 chunk_id="new-chunk-1",
66 document_id=sample_document.document_id,
67 text="Aristotle was a Greek philosopher.",
68 metadata={"auto_chunk": True}
69 ),
70 DocumentChunk(
71 chunk_id="new-chunk-2",
72 document_id=sample_document.document_id,
73 text="He studied under Plato.",
74 metadata={"auto_chunk": True}
75 ),
76 DocumentChunk(
77 chunk_id="new-chunk-3",
78 document_id=sample_document.document_id,
79 text="He made significant contributions to logic, ethics, and metaphysics.",
80 metadata={"auto_chunk": True}
81 )
82 ])
83
84 # Process the document
85 processed_doc = await service.process_document(sample_document)
86
87 # Verify chunking was called
88 service.chunk_document.assert_called_once()
89
90 # Check that document was updated with new chunks
91 assert len(processed_doc.chunks) == 3
92 assert all(chunk.metadata.get("auto_chunk") for chunk in processed_doc.chunks)
93
94 # Restore original method
95 service.chunk_document = original_chunk_method
96
97
98@pytest.mark.asyncio

Callers

nothing calls this directly

Calls 2

DocumentChunkClass · 0.85
getMethod · 0.45

Tested by

no test coverage detected