Save raw response data (including logprobs) to a separate file.
(filename: str, problem_id: int, response_data: Dict)
| 794 | print("---") |
| 795 | |
| 796 | def save_raw_response(filename: str, problem_id: int, response_data: Dict): |
| 797 | """Save raw response data (including logprobs) to a separate file.""" |
| 798 | os.makedirs(os.path.dirname(filename), exist_ok=True) |
| 799 | |
| 800 | # Create a timestamped ID for this response |
| 801 | timestamp = int(time.time()) |
| 802 | response_id = f"{problem_id}_{timestamp}" |
| 803 | |
| 804 | # Create or update the raw responses file |
| 805 | try: |
| 806 | with open(filename, 'r') as f: |
| 807 | raw_responses = json.load(f) |
| 808 | except (FileNotFoundError, json.JSONDecodeError): |
| 809 | raw_responses = {} |
| 810 | |
| 811 | # Add this response to the collection |
| 812 | raw_responses[response_id] = response_data |
| 813 | |
| 814 | # Save the updated collection |
| 815 | with open(filename, 'w') as f: |
| 816 | json.dump(raw_responses, f) |
| 817 | |
| 818 | return response_id |
| 819 | |
| 820 | def main(model: str, n_attempts: int, year: int = 2024, analyze_thoughts: bool = False, analyze_logits: bool = False, test_time_compute: bool = False, approach_name: str = None, extra_body: dict = None): |
| 821 | """Main evaluation function that handles gaps in processed indexes.""" |
no test coverage detected