Make n attempts to solve a problem and return all responses and predictions. Args: problem (str): The problem text model (str): The model identifier n (int): Number of attempts to make analyze_thoughts (bool): Whether to analyze thinking patterns
(problem: str, model: str, n: int, analyze_thoughts: bool = False, analyze_logits: bool = False, extra_body: dict = None)
| 364 | raise e # Re-raise instead of silently returning empty string |
| 365 | |
| 366 | def make_n_attempts(problem: str, model: str, n: int, analyze_thoughts: bool = False, analyze_logits: bool = False, extra_body: dict = None) -> List[Dict]: |
| 367 | """ |
| 368 | Make n attempts to solve a problem and return all responses and predictions. |
| 369 | |
| 370 | Args: |
| 371 | problem (str): The problem text |
| 372 | model (str): The model identifier |
| 373 | n (int): Number of attempts to make |
| 374 | analyze_thoughts (bool): Whether to analyze thinking patterns |
| 375 | analyze_logits (bool): Whether to analyze token probabilities |
| 376 | |
| 377 | Returns: |
| 378 | List[Dict]: List of dictionaries containing response and predicted answer for each attempt |
| 379 | """ |
| 380 | attempts = [] |
| 381 | remaining_attempts = n |
| 382 | |
| 383 | while remaining_attempts > 0: |
| 384 | try: |
| 385 | response = get_llm_response(problem, model, analyze_logits, extra_body) |
| 386 | except Exception as e: |
| 387 | logger.error(f"Failed to get response for attempt {n - remaining_attempts + 1}: {e}") |
| 388 | # Create a failed attempt record |
| 389 | attempt_data = { |
| 390 | "attempt_number": len(attempts) + 1, |
| 391 | "response": f"ERROR: {str(e)}", |
| 392 | "predicted_answer": None, |
| 393 | "error": str(e) |
| 394 | } |
| 395 | attempts.append(attempt_data) |
| 396 | remaining_attempts -= 1 |
| 397 | continue |
| 398 | |
| 399 | # If response is already formatted as attempts |
| 400 | if isinstance(response, list): |
| 401 | for attempt in response: |
| 402 | if analyze_thoughts: |
| 403 | attempt["thought_analysis"] = analyze_thinking(attempt["response"]) |
| 404 | if analyze_logits and "logprobs" in attempt: |
| 405 | attempt["logit_analysis"] = analyze_logits_probs(attempt["logprobs"]["content"]) |
| 406 | attempts.extend(response) |
| 407 | remaining_attempts = n - len(attempts) |
| 408 | elif isinstance(response, dict) and "response" in response: |
| 409 | # Process dict response with logprobs |
| 410 | response_text = response["response"] |
| 411 | predicted_answer = extract_answer(response_text) |
| 412 | attempt_data = { |
| 413 | "attempt_number": len(attempts) + 1, |
| 414 | "response": response_text, |
| 415 | "predicted_answer": predicted_answer |
| 416 | } |
| 417 | if analyze_thoughts: |
| 418 | attempt_data["thought_analysis"] = analyze_thinking(response_text) |
| 419 | if analyze_logits and "logprobs" in response: |
| 420 | attempt_data["logit_analysis"] = analyze_logits_probs(response["logprobs"]["content"]) |
| 421 | attempts.append(attempt_data) |
| 422 | remaining_attempts -= 1 |
| 423 | else: |
no test coverage detected