MCPcopy Index your code
hub / github.com/PDFMathTranslate/PDFMathTranslate / translate_stream

Function translate_stream

pdf2zh/high_level.py:169–250  ·  view source on GitHub ↗
(
    stream: bytes,
    pages: Optional[list[int]] = None,
    lang_in: str = "",
    lang_out: str = "",
    service: str = "",
    thread: int = 0,
    vfont: str = "",
    vchar: str = "",
    callback: object = None,
    cancellation_event: asyncio.Event = None,
    model: OnnxModel = None,
    envs: Dict = None,
    prompt: Template = None,
    skip_subset_fonts: bool = False,
    ignore_cache: bool = False,
    **kwarg: Any,
)

Source from the content-addressed store, hash-verified

167
168
169def translate_stream(
170 stream: bytes,
171 pages: Optional[list[int]] = None,
172 lang_in: str = "",
173 lang_out: str = "",
174 service: str = "",
175 thread: int = 0,
176 vfont: str = "",
177 vchar: str = "",
178 callback: object = None,
179 cancellation_event: asyncio.Event = None,
180 model: OnnxModel = None,
181 envs: Dict = None,
182 prompt: Template = None,
183 skip_subset_fonts: bool = False,
184 ignore_cache: bool = False,
185 **kwarg: Any,
186):
187 font_list = [("tiro", None)]
188
189 font_path = download_remote_fonts(lang_out.lower())
190 noto_name = NOTO_NAME
191 noto = Font(noto_name, font_path)
192 font_list.append((noto_name, font_path))
193
194 doc_en = Document(stream=stream)
195 stream = io.BytesIO()
196 doc_en.save(stream)
197 doc_zh = Document(stream=stream)
198 page_count = doc_zh.page_count
199 # font_list = [("GoNotoKurrent-Regular.ttf", font_path), ("tiro", None)]
200 font_id = {}
201 for page in doc_zh:
202 for font in font_list:
203 font_id[font[0]] = page.insert_font(font[0], font[1])
204 xreflen = doc_zh.xref_length()
205 for xref in range(1, xreflen):
206 for label in ["Resources/", ""]: # 可能是基于 xobj 的 res
207 try: # xref 读写可能出错
208 font_res = doc_zh.xref_get_key(xref, f"{label}Font")
209 target_key_prefix = f"{label}Font/"
210 if font_res[0] == "xref":
211 resource_xref_id = re.search("(\\d+) 0 R", font_res[1]).group(1)
212 xref = int(resource_xref_id)
213 font_res = ("dict", doc_zh.xref_object(xref))
214 target_key_prefix = ""
215
216 if font_res[0] == "dict":
217 for font in font_list:
218 target_key = f"{target_key_prefix}{font[0]}"
219 font_exist = doc_zh.xref_get_key(xref, target_key)
220 if font_exist[0] == "null":
221 doc_zh.xref_set_key(
222 xref,
223 target_key,
224 f"{font_id[font[0]]} 0 R",
225 )
226 except Exception:

Callers 3

translate_taskFunction · 0.90
translate_pdfFunction · 0.90
translateFunction · 0.85

Calls 2

download_remote_fontsFunction · 0.85
translate_patchFunction · 0.85

Tested by

no test coverage detected