Check the quality of converted docx.
(path)
| 333 | |
| 334 | @pytest.mark.parametrize('path', g_paths) |
| 335 | def test_one(path): |
| 336 | '''Check the quality of converted docx. |
| 337 | ''' |
| 338 | |
| 339 | # Where there are two values, they are (sidx_required_word, |
| 340 | # sidx_required_libreoffice). |
| 341 | # |
| 342 | docx_to_sidx_required = { |
| 343 | 'demo-blank.pdf': 1.0, |
| 344 | 'demo-image-cmyk.pdf': 0.90, |
| 345 | 'demo-image-transparent.pdf': 0.90, |
| 346 | 'demo-image-vector-graphic.pdf': (0.89, 0.68), |
| 347 | 'demo-image.pdf': 0.90, |
| 348 | 'demo-image-rotation.pdf': (0.90, 0.82), |
| 349 | 'demo-image-overlap.pdf': (0.90, 0.70), |
| 350 | 'demo-path-transformation.pdf': (0.89, 0.60), |
| 351 | 'demo-section-spacing.pdf': (0.90, 0.86), |
| 352 | 'demo-section.pdf': (0.70, 0.45), |
| 353 | 'demo-table-align-borders.pdf': 0.49, |
| 354 | 'demo-table-border-style.pdf': (0.90, 0.89), |
| 355 | 'demo-table-bottom.pdf': 0.90, |
| 356 | 'demo-table-close-underline.pdf': (0.57, 0.49), |
| 357 | 'demo-table-lattice-one-cell.pdf': (0.79, 0.75), |
| 358 | 'demo-table-lattice.pdf': (0.75, 0.59), |
| 359 | 'demo-table-nested.pdf': 0.84, |
| 360 | 'demo-table-shading-highlight.pdf': (0.55, 0.45), |
| 361 | 'demo-table-shading.pdf': (0.80, 0.60), |
| 362 | 'demo-table-stream.pdf': 0.55, |
| 363 | 'demo-table.pdf': (0.90, 0.75), |
| 364 | 'demo-text-alignment.pdf': (0.90, 0.86), |
| 365 | 'demo-text-scaling.pdf': (0.80, 0.65), |
| 366 | 'demo-text-unnamed-fonts.pdf': (0.80, 0.77), |
| 367 | 'demo-text-hidden.pdf': 0.90, |
| 368 | 'demo-text.pdf': 0.80, |
| 369 | 'pdf2docx-lists-bullets3.docx': (0.98, 0.99), |
| 370 | } |
| 371 | |
| 372 | print(f'# Looking at: {path}') |
| 373 | path = f'{root_path}/{path}' |
| 374 | path_leaf = os.path.basename(path) |
| 375 | _, ext = os.path.splitext(path) |
| 376 | if ext == '.docx': |
| 377 | pdf = f'{path}.pdf' |
| 378 | document_to(path, pdf) |
| 379 | else: |
| 380 | pdf = path |
| 381 | docx2 = f'{pdf}.docx' |
| 382 | pages = None |
| 383 | if os.path.basename(path) == 'demo-whisper_2_3.pdf': |
| 384 | pages = [25, 26, 27] |
| 385 | else: |
| 386 | with fitz.Document(pdf) as doc: |
| 387 | if len(doc) > 1: |
| 388 | print(f'Not testing because more than one page: {path}') |
| 389 | return |
| 390 | #print(f'Calling parse() {pdf=} {docx2=}') |
| 391 | parse(pdf, docx2, pages=pages, raw_exceptions=True) |
| 392 | assert os.path.isfile(docx2) |
nothing calls this directly
no test coverage detected
searching dependent graphs…