MCPcopy Index your code
hub / github.com/1Panel-dev/MaxKB / handle

Method handle

apps/common/handle/impl/text/pdf_split_handle.py:50–103  ·  view source on GitHub ↗
(
        self,
        file,
        pattern_list: List,
        with_filter: bool,
        limit: int,
        get_buffer,
        save_image,
    )

Source from the content-addressed store, hash-verified

48
49class PdfSplitHandle(BaseSplitHandle):
50 def handle(
51 self,
52 file,
53 pattern_list: List,
54 with_filter: bool,
55 limit: int,
56 get_buffer,
57 save_image,
58 ):
59 with tempfile.NamedTemporaryFile(delete=False) as temp_file:
60 # 将上传的文件保存到临时文件中
61 for chunk in file.chunks():
62 temp_file.write(chunk)
63 # 获取临时文件的路径
64 temp_file_path = temp_file.name
65
66 try:
67 with open(temp_file_path, "rb") as pdf_file:
68 pdf_document = PdfReader(pdf_file)
69 if type(limit) is str:
70 limit = int(limit)
71 if type(with_filter) is str:
72 with_filter = with_filter.lower() == "true"
73 # 处理有目录的pdf
74 result = self.handle_toc(pdf_document, limit)
75 if result is not None:
76 return {"name": file.name, "content": result}
77
78 # 没目录但是有链接的pdf
79 result = self.handle_links(
80 pdf_document, pattern_list, with_filter, limit
81 )
82 if result is not None and len(result) > 0:
83 return {"name": file.name, "content": result}
84
85 # 没有目录的pdf
86 content = self.handle_pdf_content(file, pdf_document)
87
88 if pattern_list is not None and len(pattern_list) > 0:
89 split_model = SplitModel(pattern_list, with_filter, limit)
90 else:
91 split_model = SplitModel(
92 default_pattern_list, with_filter=with_filter, limit=limit
93 )
94 except BaseException as e:
95 maxkb_logger.error(
96 f"File: {file.name}, error: {e}, {traceback.format_exc()}"
97 )
98 return {"name": file.name, "content": []}
99 finally:
100 # 处理完后可以删除临时文件
101 os.remove(temp_file_path)
102
103 return {"name": file.name, "content": split_model.parse(content)}
104
105 @staticmethod
106 def handle_pdf_content(file, pdf_document):

Callers

nothing calls this directly

Calls 8

handle_tocMethod · 0.95
handle_linksMethod · 0.95
handle_pdf_contentMethod · 0.95
parseMethod · 0.95
SplitModelClass · 0.90
openFunction · 0.50
writeMethod · 0.45
errorMethod · 0.45

Tested by

no test coverage detected