(pdflib: str)
| 38 | |
| 39 | @pytest.mark.parametrize("pdflib", ["unstructured"]) |
| 40 | def test_get_pdf_doc_path(pdflib: str): |
| 41 | current_dir = os.path.dirname(os.path.abspath(__file__)) |
| 42 | tests_root = os.path.abspath(os.path.join(current_dir, "..")) |
| 43 | path = os.path.join(tests_root, "main", "data", "dummy.pdf") |
| 44 | pdf_parser = DocumentParser.create( |
| 45 | path, ParsingConfig(pdf=PdfParsingConfig(library=pdflib)) |
| 46 | ) |
| 47 | doc = pdf_parser.get_doc() |
| 48 | |
| 49 | # Check the results |
| 50 | assert isinstance(doc.content, str) |
| 51 | assert len(doc.content) > 0 # assuming the PDF is not empty |
| 52 | assert doc.metadata.source == path |
| 53 | |
| 54 | docs = pdf_parser.get_doc_chunks() |
| 55 | assert len(docs) > 0 |
| 56 | assert all(d.metadata.is_chunk for d in docs) |
| 57 | assert all(path in d.metadata.source for d in docs) |
nothing calls this directly
no test coverage detected
searching dependent graphs…