Extract table content from pdf pages. Args: pdf_file (str) : PDF filename to read from. password (str): Password for encrypted pdf. Default to None if not encrypted. start (int, optional): First page to process. Defaults to 0. end (int, option
(pdf_file, password:str=None, start:int=0, end:int=None, pages:list=None, **kwargs)
| 80 | |
| 81 | @staticmethod |
| 82 | def table(pdf_file, password:str=None, start:int=0, end:int=None, pages:list=None, **kwargs): |
| 83 | '''Extract table content from pdf pages. |
| 84 | |
| 85 | Args: |
| 86 | pdf_file (str) : PDF filename to read from. |
| 87 | password (str): Password for encrypted pdf. Default to None if not encrypted. |
| 88 | start (int, optional): First page to process. Defaults to 0. |
| 89 | end (int, optional): Last page to process. Defaults to None. |
| 90 | pages (list, optional): Range of pages, e.g. --pages=1,3,5. Defaults to None. |
| 91 | ''' |
| 92 | # index starts from zero or one |
| 93 | if isinstance(pages, int): pages = [pages] # in case --pages=1 |
| 94 | if not kwargs.get('zero_based_index', True): |
| 95 | start = max(start-1, 0) |
| 96 | if end: end -= 1 |
| 97 | if pages: pages = [i-1 for i in pages] |
| 98 | |
| 99 | cv = Converter(pdf_file, password) |
| 100 | try: |
| 101 | tables = cv.extract_tables(start, end, pages, **kwargs) |
| 102 | except Exception as e: |
| 103 | tables = [] |
| 104 | logging.error(e) |
| 105 | finally: |
| 106 | cv.close() |
| 107 | |
| 108 | return tables |
| 109 | |
| 110 | |
| 111 | @staticmethod |
nothing calls this directly
no test coverage detected