MCPcopy
hub / github.com/ArtifexSoftware/pdf2docx / test_one

Function test_one

test/test.py:335–410  ·  view source on GitHub ↗

Check the quality of converted docx.

(path)

Source from the content-addressed store, hash-verified

333
334@pytest.mark.parametrize('path', g_paths)
335def test_one(path):
336 '''Check the quality of converted docx.
337 '''
338
339 # Where there are two values, they are (sidx_required_word,
340 # sidx_required_libreoffice).
341 #
342 docx_to_sidx_required = {
343 'demo-blank.pdf': 1.0,
344 'demo-image-cmyk.pdf': 0.90,
345 'demo-image-transparent.pdf': 0.90,
346 'demo-image-vector-graphic.pdf': (0.89, 0.68),
347 'demo-image.pdf': 0.90,
348 'demo-image-rotation.pdf': (0.90, 0.82),
349 'demo-image-overlap.pdf': (0.90, 0.70),
350 'demo-path-transformation.pdf': (0.89, 0.60),
351 'demo-section-spacing.pdf': (0.90, 0.86),
352 'demo-section.pdf': (0.70, 0.45),
353 'demo-table-align-borders.pdf': 0.49,
354 'demo-table-border-style.pdf': (0.90, 0.89),
355 'demo-table-bottom.pdf': 0.90,
356 'demo-table-close-underline.pdf': (0.57, 0.49),
357 'demo-table-lattice-one-cell.pdf': (0.79, 0.75),
358 'demo-table-lattice.pdf': (0.75, 0.59),
359 'demo-table-nested.pdf': 0.84,
360 'demo-table-shading-highlight.pdf': (0.55, 0.45),
361 'demo-table-shading.pdf': (0.80, 0.60),
362 'demo-table-stream.pdf': 0.55,
363 'demo-table.pdf': (0.90, 0.75),
364 'demo-text-alignment.pdf': (0.90, 0.86),
365 'demo-text-scaling.pdf': (0.80, 0.65),
366 'demo-text-unnamed-fonts.pdf': (0.80, 0.77),
367 'demo-text-hidden.pdf': 0.90,
368 'demo-text.pdf': 0.80,
369 'pdf2docx-lists-bullets3.docx': (0.98, 0.99),
370 }
371
372 print(f'# Looking at: {path}')
373 path = f'{root_path}/{path}'
374 path_leaf = os.path.basename(path)
375 _, ext = os.path.splitext(path)
376 if ext == '.docx':
377 pdf = f'{path}.pdf'
378 document_to(path, pdf)
379 else:
380 pdf = path
381 docx2 = f'{pdf}.docx'
382 pages = None
383 if os.path.basename(path) == 'demo-whisper_2_3.pdf':
384 pages = [25, 26, 27]
385 else:
386 with fitz.Document(pdf) as doc:
387 if len(doc) > 1:
388 print(f'Not testing because more than one page: {path}')
389 return
390 #print(f'Calling parse() {pdf=} {docx2=}')
391 parse(pdf, docx2, pages=pages, raw_exceptions=True)
392 assert os.path.isfile(docx2)

Callers

nothing calls this directly

Calls 3

document_toFunction · 0.85
compare_pdfFunction · 0.85
getMethod · 0.80

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…