Method get_pages

pattern/web/pdf/pdfparser.py:493–519 · view source on GitHub ↗

(self)

Source from the content-addressed store, hash-verified

491
492	INHERITABLE_ATTRS = set(['Resources', 'MediaBox', 'CropBox', 'Rotate'])
493	def get_pages(self):
494	if not self.xrefs:
495	raise PDFException('PDFDocument is not initialized')
496	def search(obj, parent):
497	if isinstance(obj, int):
498	objid = obj
499	tree = dict_value(self.getobj(objid)).copy()
500	else:
501	objid = obj.objid
502	tree = dict_value(obj).copy()
503	for (k,v) in parent.iteritems():
504	if k in self.INHERITABLE_ATTRS and k not in tree:
505	tree[k] = v
506	if tree.get('Type') is LITERAL_PAGES and 'Kids' in tree:
507	if 1 <= self.debug:
508	print >>sys.stderr, 'Pages: Kids=%r' % tree['Kids']
509	for c in list_value(tree['Kids']):
510	for x in search(c, tree):
511	yield x
512	elif tree.get('Type') is LITERAL_PAGE:
513	if 1 <= self.debug:
514	print >>sys.stderr, 'Page: %r' % tree
515	yield (objid, tree)
516	if 'Pages' not in self.catalog: return
517	for (pageid,tree) in search(self.catalog['Pages'], self.catalog):
518	yield PDFPage(self, pageid, tree)
519	return
520
521	def get_outlines(self):
522	if 'Outlines' not in self.catalog:

process_pdfFunction · 0.95

PDFExceptionClass · 0.90

searchFunction · 0.85

PDFPageClass · 0.85

no test coverage detected