PDFParser fetch PDF objects from a file stream. It can handle indirect references by referring to a PDF document set by set_document method. It also reads XRefs at the end of every PDF file. Typical usage: parser = PDFParser(fp) parser.read_xref() parser.set_d
| 579 | ## PDFParser |
| 580 | ## |
| 581 | class PDFParser(PSStackParser): |
| 582 | |
| 583 | """ |
| 584 | PDFParser fetch PDF objects from a file stream. |
| 585 | It can handle indirect references by referring to |
| 586 | a PDF document set by set_document method. |
| 587 | It also reads XRefs at the end of every PDF file. |
| 588 | |
| 589 | Typical usage: |
| 590 | parser = PDFParser(fp) |
| 591 | parser.read_xref() |
| 592 | parser.set_document(doc) |
| 593 | parser.seek(offset) |
| 594 | parser.nextobject() |
| 595 | |
| 596 | """ |
| 597 | |
| 598 | def __init__(self, fp): |
| 599 | PSStackParser.__init__(self, fp) |
| 600 | self.doc = None |
| 601 | self.fallback = False |
| 602 | return |
| 603 | |
| 604 | def set_document(self, doc): |
| 605 | """Associates the parser with a PDFDocument object.""" |
| 606 | self.doc = doc |
| 607 | return |
| 608 | |
| 609 | KEYWORD_R = KWD('R') |
| 610 | KEYWORD_NULL = KWD('null') |
| 611 | KEYWORD_ENDOBJ = KWD('endobj') |
| 612 | KEYWORD_STREAM = KWD('stream') |
| 613 | KEYWORD_XREF = KWD('xref') |
| 614 | KEYWORD_STARTXREF = KWD('startxref') |
| 615 | def do_keyword(self, pos, token): |
| 616 | """Handles PDF-related keywords.""" |
| 617 | |
| 618 | if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): |
| 619 | self.add_results(*self.pop(1)) |
| 620 | |
| 621 | elif token is self.KEYWORD_ENDOBJ: |
| 622 | self.add_results(*self.pop(4)) |
| 623 | |
| 624 | elif token is self.KEYWORD_NULL: |
| 625 | # null object |
| 626 | self.push((pos, None)) |
| 627 | |
| 628 | elif token is self.KEYWORD_R: |
| 629 | # reference to indirect object |
| 630 | try: |
| 631 | ((_,objid), (_,genno)) = self.pop(2) |
| 632 | (objid, genno) = (int(objid), int(genno)) |
| 633 | obj = PDFObjRef(self.doc, objid, genno) |
| 634 | self.push((pos, obj)) |
| 635 | except PSSyntaxError: |
| 636 | pass |
| 637 | |
| 638 | elif token is self.KEYWORD_STREAM: |
no outgoing calls
no test coverage detected
searching dependent graphs…