Makes a request to the gemini-2.5-pro-preview-03-25 model with retry functionality. Args: prompt (str): The text prompt to send to the model log_id (str, optional): The log ID for tracking requests, defaults to tkb+timestamp max_tokens (int, optional): Maximum token
(prompt, log_id=None, max_tokens=8000, max_retries=3)
| 418 | |
| 419 | |
| 420 | def request_gemini_token(prompt, log_id=None, max_tokens=8000, max_retries=3): |
| 421 | """ |
| 422 | Makes a request to the gemini-2.5-pro-preview-03-25 model with retry functionality. |
| 423 | |
| 424 | Args: |
| 425 | prompt (str): The text prompt to send to the model |
| 426 | log_id (str, optional): The log ID for tracking requests, defaults to tkb+timestamp |
| 427 | max_tokens (int, optional): Maximum tokens for response, default 8000 |
| 428 | max_retries (int, optional): Maximum number of retry attempts, default 3 |
| 429 | |
| 430 | Returns: |
| 431 | dict: The model's response |
| 432 | """ |
| 433 | |
| 434 | base_url = cfg("gemini", "base_url") |
| 435 | api_version = cfg("gemini", "api_version") |
| 436 | api_key = cfg("gemini", "api_key") |
| 437 | model_name = cfg("gemini", "model") |
| 438 | |
| 439 | client = openai.AzureOpenAI( |
| 440 | azure_endpoint=base_url, |
| 441 | api_version=api_version, |
| 442 | api_key=api_key, |
| 443 | ) |
| 444 | |
| 445 | if log_id is None: |
| 446 | log_id = generate_log_id() |
| 447 | |
| 448 | extra_headers = {"X-TT-LOGID": log_id} |
| 449 | |
| 450 | usage_info = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} |
| 451 | |
| 452 | retry_count = 0 |
| 453 | while retry_count < max_retries: |
| 454 | try: |
| 455 | completion = client.chat.completions.create( |
| 456 | model=model_name, |
| 457 | messages=[{"role": "user", "content": prompt}], |
| 458 | max_tokens=max_tokens, |
| 459 | extra_headers=extra_headers, |
| 460 | ) |
| 461 | |
| 462 | if completion.usage: |
| 463 | usage_info["prompt_tokens"] = completion.usage.prompt_tokens |
| 464 | usage_info["completion_tokens"] = completion.usage.completion_tokens |
| 465 | usage_info["total_tokens"] = completion.usage.total_tokens |
| 466 | return completion, usage_info |
| 467 | |
| 468 | except Exception as e: |
| 469 | retry_count += 1 |
| 470 | if retry_count >= max_retries: |
| 471 | raise Exception(f"Failed after {max_retries} attempts. Last error: {str(e)}") |
| 472 | |
| 473 | # Exponential backoff with jitter |
| 474 | delay = (2**retry_count) * 0.1 + (random.random() * 0.1) |
| 475 | print( |
| 476 | f"Request failed with error: {str(e)}. Retrying in {delay:.2f} seconds... (Attempt {retry_count}/{max_retries})" |
| 477 | ) |
nothing calls this directly
no test coverage detected