MCPcopy Index your code
hub / github.com/clips/pattern / PDFParser

Class PDFParser

pattern/web/pdf/pdfparser.py:581–755  ·  view source on GitHub ↗

PDFParser fetch PDF objects from a file stream. It can handle indirect references by referring to a PDF document set by set_document method. It also reads XRefs at the end of every PDF file. Typical usage: parser = PDFParser(fp) parser.read_xref() parser.set_d

Source from the content-addressed store, hash-verified

579## PDFParser
580##
581class PDFParser(PSStackParser):
582
583 """
584 PDFParser fetch PDF objects from a file stream.
585 It can handle indirect references by referring to
586 a PDF document set by set_document method.
587 It also reads XRefs at the end of every PDF file.
588
589 Typical usage:
590 parser = PDFParser(fp)
591 parser.read_xref()
592 parser.set_document(doc)
593 parser.seek(offset)
594 parser.nextobject()
595
596 """
597
598 def __init__(self, fp):
599 PSStackParser.__init__(self, fp)
600 self.doc = None
601 self.fallback = False
602 return
603
604 def set_document(self, doc):
605 """Associates the parser with a PDFDocument object."""
606 self.doc = doc
607 return
608
609 KEYWORD_R = KWD('R')
610 KEYWORD_NULL = KWD('null')
611 KEYWORD_ENDOBJ = KWD('endobj')
612 KEYWORD_STREAM = KWD('stream')
613 KEYWORD_XREF = KWD('xref')
614 KEYWORD_STARTXREF = KWD('startxref')
615 def do_keyword(self, pos, token):
616 """Handles PDF-related keywords."""
617
618 if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
619 self.add_results(*self.pop(1))
620
621 elif token is self.KEYWORD_ENDOBJ:
622 self.add_results(*self.pop(4))
623
624 elif token is self.KEYWORD_NULL:
625 # null object
626 self.push((pos, None))
627
628 elif token is self.KEYWORD_R:
629 # reference to indirect object
630 try:
631 ((_,objid), (_,genno)) = self.pop(2)
632 (objid, genno) = (int(objid), int(genno))
633 obj = PDFObjRef(self.doc, objid, genno)
634 self.push((pos, obj))
635 except PSSyntaxError:
636 pass
637
638 elif token is self.KEYWORD_STREAM:

Callers 1

process_pdfFunction · 0.90

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…