Download and parse the ProofBench CSV dataset
()
| 78 | |
| 79 | |
| 80 | def download_proofbench() -> pd.DataFrame: |
| 81 | """ |
| 82 | Download and parse the ProofBench CSV dataset |
| 83 | """ |
| 84 | logger.info("Downloading ProofBench dataset...") |
| 85 | try: |
| 86 | response = requests.get(PROOFBENCH_URL, timeout=30) |
| 87 | response.raise_for_status() |
| 88 | |
| 89 | # Save to temp file and load with pandas |
| 90 | temp_file = "/tmp/proofbench.csv" |
| 91 | with open(temp_file, 'wb') as f: |
| 92 | f.write(response.content) |
| 93 | |
| 94 | df = pd.read_csv(temp_file) |
| 95 | logger.info(f"Loaded {len(df)} problems from ProofBench") |
| 96 | return df |
| 97 | |
| 98 | except Exception as e: |
| 99 | logger.error(f"Error downloading ProofBench: {e}") |
| 100 | raise |
| 101 | |
| 102 | |
| 103 | def verify_proof(problem: str, solution: str, grading_guidelines: str, model: str, client: OpenAI) -> Dict: |