(rsrcmgr, device, fp, pagenos=None, maxpages=0, password='',
caching=True, check_extractable=True)
| 810 | class PDFTextExtractionNotAllowed(PDFInterpreterError): pass |
| 811 | |
| 812 | def process_pdf(rsrcmgr, device, fp, pagenos=None, maxpages=0, password='', |
| 813 | caching=True, check_extractable=True): |
| 814 | # Create a PDF parser object associated with the file object. |
| 815 | parser = PDFParser(fp) |
| 816 | # Create a PDF document object that stores the document structure. |
| 817 | doc = PDFDocument(caching=caching) |
| 818 | # Connect the parser and document objects. |
| 819 | parser.set_document(doc) |
| 820 | doc.set_parser(parser) |
| 821 | # Supply the document password for initialization. |
| 822 | # (If no password is set, give an empty string.) |
| 823 | doc.initialize(password) |
| 824 | # Check if the document allows text extraction. If not, abort. |
| 825 | if check_extractable and not doc.is_extractable: |
| 826 | raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp) |
| 827 | # Create a PDF interpreter object. |
| 828 | interpreter = PDFPageInterpreter(rsrcmgr, device) |
| 829 | # Process each page contained in the document. |
| 830 | for (pageno,page) in enumerate(doc.get_pages()): |
| 831 | if pagenos and (pageno not in pagenos): continue |
| 832 | interpreter.process_page(page) |
| 833 | if maxpages and maxpages <= pageno+1: break |
| 834 | return |
no test coverage detected
searching dependent graphs…