MCPcopy
hub / github.com/binary-husky/gpt_academic / pdf2tex_project

Function pdf2tex_project

crazy_functions/Latex_Function.py:181–244  ·  view source on GitHub ↗
(pdf_file_path, plugin_kwargs)

Source from the content-addressed store, hash-verified

179
180
181def pdf2tex_project(pdf_file_path, plugin_kwargs):
182 if plugin_kwargs["method"] == "MATHPIX":
183 # Mathpix API credentials
184 app_id, app_key = get_conf('MATHPIX_APPID', 'MATHPIX_APPKEY')
185 headers = {"app_id": app_id, "app_key": app_key}
186
187 # Step 1: Send PDF file for processing
188 options = {
189 "conversion_formats": {"tex.zip": True},
190 "math_inline_delimiters": ["$", "$"],
191 "rm_spaces": True
192 }
193
194 response = requests.post(url="https://api.mathpix.com/v3/pdf",
195 headers=headers,
196 data={"options_json": json.dumps(options)},
197 files={"file": open(pdf_file_path, "rb")})
198
199 if response.ok:
200 pdf_id = response.json()["pdf_id"]
201 logger.info(f"PDF processing initiated. PDF ID: {pdf_id}")
202
203 # Step 2: Check processing status
204 while True:
205 conversion_response = requests.get(f"https://api.mathpix.com/v3/pdf/{pdf_id}", headers=headers)
206 conversion_data = conversion_response.json()
207
208 if conversion_data["status"] == "completed":
209 logger.info("PDF processing completed.")
210 break
211 elif conversion_data["status"] == "error":
212 logger.info("Error occurred during processing.")
213 else:
214 logger.info(f"Processing status: {conversion_data['status']}")
215 time.sleep(5) # wait for a few seconds before checking again
216
217 # Step 3: Save results to local files
218 output_dir = os.path.join(os.path.dirname(pdf_file_path), 'mathpix_output')
219 if not os.path.exists(output_dir):
220 os.makedirs(output_dir)
221
222 url = f"https://api.mathpix.com/v3/pdf/{pdf_id}.tex"
223 response = requests.get(url, headers=headers)
224 file_name_wo_dot = '_'.join(os.path.basename(pdf_file_path).split('.')[:-1])
225 output_name = f"{file_name_wo_dot}.tex.zip"
226 output_path = os.path.join(output_dir, output_name)
227 with open(output_path, "wb") as output_file:
228 output_file.write(response.content)
229 logger.info(f"tex.zip file saved at: {output_path}")
230
231 import zipfile
232 unzip_dir = os.path.join(output_dir, file_name_wo_dot)
233 with zipfile.ZipFile(output_path, 'r') as zip_ref:
234 zip_ref.extractall(unzip_dir)
235
236 return unzip_dir
237
238 else:

Callers 1

Calls 4

解析PDF_DOC2X_转LatexFunction · 0.90
get_confFunction · 0.85
getMethod · 0.80
splitMethod · 0.80

Tested by

no test coverage detected