(
self,
file,
pattern_list: List,
with_filter: bool,
limit: int,
get_buffer,
save_image,
)
| 48 | |
| 49 | class PdfSplitHandle(BaseSplitHandle): |
| 50 | def handle( |
| 51 | self, |
| 52 | file, |
| 53 | pattern_list: List, |
| 54 | with_filter: bool, |
| 55 | limit: int, |
| 56 | get_buffer, |
| 57 | save_image, |
| 58 | ): |
| 59 | with tempfile.NamedTemporaryFile(delete=False) as temp_file: |
| 60 | # 将上传的文件保存到临时文件中 |
| 61 | for chunk in file.chunks(): |
| 62 | temp_file.write(chunk) |
| 63 | # 获取临时文件的路径 |
| 64 | temp_file_path = temp_file.name |
| 65 | |
| 66 | try: |
| 67 | with open(temp_file_path, "rb") as pdf_file: |
| 68 | pdf_document = PdfReader(pdf_file) |
| 69 | if type(limit) is str: |
| 70 | limit = int(limit) |
| 71 | if type(with_filter) is str: |
| 72 | with_filter = with_filter.lower() == "true" |
| 73 | # 处理有目录的pdf |
| 74 | result = self.handle_toc(pdf_document, limit) |
| 75 | if result is not None: |
| 76 | return {"name": file.name, "content": result} |
| 77 | |
| 78 | # 没目录但是有链接的pdf |
| 79 | result = self.handle_links( |
| 80 | pdf_document, pattern_list, with_filter, limit |
| 81 | ) |
| 82 | if result is not None and len(result) > 0: |
| 83 | return {"name": file.name, "content": result} |
| 84 | |
| 85 | # 没有目录的pdf |
| 86 | content = self.handle_pdf_content(file, pdf_document) |
| 87 | |
| 88 | if pattern_list is not None and len(pattern_list) > 0: |
| 89 | split_model = SplitModel(pattern_list, with_filter, limit) |
| 90 | else: |
| 91 | split_model = SplitModel( |
| 92 | default_pattern_list, with_filter=with_filter, limit=limit |
| 93 | ) |
| 94 | except BaseException as e: |
| 95 | maxkb_logger.error( |
| 96 | f"File: {file.name}, error: {e}, {traceback.format_exc()}" |
| 97 | ) |
| 98 | return {"name": file.name, "content": []} |
| 99 | finally: |
| 100 | # 处理完后可以删除临时文件 |
| 101 | os.remove(temp_file_path) |
| 102 | |
| 103 | return {"name": file.name, "content": split_model.parse(content)} |
| 104 | |
| 105 | @staticmethod |
| 106 | def handle_pdf_content(file, pdf_document): |
nothing calls this directly
no test coverage detected