MCPcopy Index your code
hub / github.com/algorithmicsuperintelligence/optillm / make_n_attempts

Function make_n_attempts

scripts/eval_aime_benchmark.py:366–436  ·  view source on GitHub ↗

Make n attempts to solve a problem and return all responses and predictions. Args: problem (str): The problem text model (str): The model identifier n (int): Number of attempts to make analyze_thoughts (bool): Whether to analyze thinking patterns

(problem: str, model: str, n: int, analyze_thoughts: bool = False, analyze_logits: bool = False, extra_body: dict = None)

Source from the content-addressed store, hash-verified

364 raise e # Re-raise instead of silently returning empty string
365
366def make_n_attempts(problem: str, model: str, n: int, analyze_thoughts: bool = False, analyze_logits: bool = False, extra_body: dict = None) -> List[Dict]:
367 """
368 Make n attempts to solve a problem and return all responses and predictions.
369
370 Args:
371 problem (str): The problem text
372 model (str): The model identifier
373 n (int): Number of attempts to make
374 analyze_thoughts (bool): Whether to analyze thinking patterns
375 analyze_logits (bool): Whether to analyze token probabilities
376
377 Returns:
378 List[Dict]: List of dictionaries containing response and predicted answer for each attempt
379 """
380 attempts = []
381 remaining_attempts = n
382
383 while remaining_attempts > 0:
384 try:
385 response = get_llm_response(problem, model, analyze_logits, extra_body)
386 except Exception as e:
387 logger.error(f"Failed to get response for attempt {n - remaining_attempts + 1}: {e}")
388 # Create a failed attempt record
389 attempt_data = {
390 "attempt_number": len(attempts) + 1,
391 "response": f"ERROR: {str(e)}",
392 "predicted_answer": None,
393 "error": str(e)
394 }
395 attempts.append(attempt_data)
396 remaining_attempts -= 1
397 continue
398
399 # If response is already formatted as attempts
400 if isinstance(response, list):
401 for attempt in response:
402 if analyze_thoughts:
403 attempt["thought_analysis"] = analyze_thinking(attempt["response"])
404 if analyze_logits and "logprobs" in attempt:
405 attempt["logit_analysis"] = analyze_logits_probs(attempt["logprobs"]["content"])
406 attempts.extend(response)
407 remaining_attempts = n - len(attempts)
408 elif isinstance(response, dict) and "response" in response:
409 # Process dict response with logprobs
410 response_text = response["response"]
411 predicted_answer = extract_answer(response_text)
412 attempt_data = {
413 "attempt_number": len(attempts) + 1,
414 "response": response_text,
415 "predicted_answer": predicted_answer
416 }
417 if analyze_thoughts:
418 attempt_data["thought_analysis"] = analyze_thinking(response_text)
419 if analyze_logits and "logprobs" in response:
420 attempt_data["logit_analysis"] = analyze_logits_probs(response["logprobs"]["content"])
421 attempts.append(attempt_data)
422 remaining_attempts -= 1
423 else:

Callers 1

mainFunction · 0.85

Calls 4

analyze_thinkingFunction · 0.85
analyze_logits_probsFunction · 0.85
get_llm_responseFunction · 0.70
extract_answerFunction · 0.70

Tested by

no test coverage detected