Get response from the LLM for a given problem. If multiple choices are returned, formats them as attempt dictionaries. Args: problem (str): The problem text model (str): The model identifier analyze_logits (bool): Whether to request logprobs Ret
(problem: str, model: str, analyze_logits: bool = False, extra_body: dict = None)
| 286 | } |
| 287 | |
| 288 | def get_llm_response(problem: str, model: str, analyze_logits: bool = False, extra_body: dict = None) -> Union[str, List[Dict]]: |
| 289 | """ |
| 290 | Get response from the LLM for a given problem. |
| 291 | If multiple choices are returned, formats them as attempt dictionaries. |
| 292 | |
| 293 | Args: |
| 294 | problem (str): The problem text |
| 295 | model (str): The model identifier |
| 296 | analyze_logits (bool): Whether to request logprobs |
| 297 | |
| 298 | Returns: |
| 299 | Union[str, List[Dict]]: Either a string response or list of attempt dictionaries |
| 300 | """ |
| 301 | try: |
| 302 | # Add logprobs parameters if requested |
| 303 | kwargs = {} |
| 304 | if analyze_logits: |
| 305 | kwargs["logprobs"] = True |
| 306 | kwargs["top_logprobs"] = 3 |
| 307 | |
| 308 | # Add extra_body if provided |
| 309 | if extra_body: |
| 310 | kwargs["extra_body"] = extra_body |
| 311 | |
| 312 | response = client.with_options(timeout=6000.0).chat.completions.create( |
| 313 | model=model, |
| 314 | messages=[ |
| 315 | {"role": "user", "content": SYSTEM_PROMPT + problem} |
| 316 | ], |
| 317 | max_tokens=64000, |
| 318 | **kwargs |
| 319 | ) |
| 320 | |
| 321 | # Save raw response if logprobs are requested |
| 322 | if analyze_logits: |
| 323 | raw_filename = f"results/raw_responses_{model.replace('/', '_')}.json" |
| 324 | problem_id = hash(problem) % 10000 # Simple hash to identify the problem |
| 325 | save_raw_response(raw_filename, problem_id, response.model_dump()) |
| 326 | |
| 327 | # If there's more than one choice, format as attempts |
| 328 | if len(response.choices) > 1: |
| 329 | attempts = [] |
| 330 | for i, choice in enumerate(response.choices): |
| 331 | response_text = choice.message.content.strip() |
| 332 | predicted_answer = extract_answer(response_text) |
| 333 | attempt_data = { |
| 334 | "attempt_number": i + 1, |
| 335 | "response": response_text, |
| 336 | "predicted_answer": predicted_answer |
| 337 | } |
| 338 | |
| 339 | # Add logprobs if available |
| 340 | if analyze_logits and hasattr(choice.message, 'logprobs') and choice.message.logprobs: |
| 341 | attempt_data["logprobs"] = choice.message.logprobs |
| 342 | |
| 343 | attempts.append(attempt_data) |
| 344 | return attempts |
| 345 |
no test coverage detected