Makes a multimodal request to the Gemini-2.5 model using video + text. Args: prompt (str): The user instruction, e.g., "Please evaluate and suggest improvements for this educational animation." video_path (str): Local path to the video file (MP4 preferred, <20MB recommended
(prompt: str, video_path: str, log_id=None, max_tokens: int = 10000, max_retries: int = 3)
| 122 | |
| 123 | |
| 124 | def request_gemini_with_video(prompt: str, video_path: str, log_id=None, max_tokens: int = 10000, max_retries: int = 3): |
| 125 | """ |
| 126 | Makes a multimodal request to the Gemini-2.5 model using video + text. |
| 127 | |
| 128 | Args: |
| 129 | prompt (str): The user instruction, e.g., "Please evaluate and suggest improvements for this educational animation." |
| 130 | video_path (str): Local path to the video file (MP4 preferred, <20MB recommended). |
| 131 | log_id (str, optional): Tracking ID |
| 132 | max_tokens (int): Max response token length |
| 133 | max_retries (int): Max retry attempts |
| 134 | |
| 135 | Returns: |
| 136 | dict: The Gemini model response |
| 137 | """ |
| 138 | base_url = cfg("gemini", "base_url") |
| 139 | api_version = cfg("gemini", "api_version") |
| 140 | api_key = cfg("gemini", "api_key") |
| 141 | model_name = cfg("gemini", "model") |
| 142 | |
| 143 | client = openai.AzureOpenAI( |
| 144 | azure_endpoint=base_url, |
| 145 | api_version=api_version, |
| 146 | api_key=api_key, |
| 147 | ) |
| 148 | |
| 149 | if log_id is None: |
| 150 | log_id = generate_log_id() |
| 151 | |
| 152 | extra_headers = {"X-TT-LOGID": log_id} |
| 153 | |
| 154 | # Load and base64-encode video |
| 155 | if not os.path.exists(video_path): |
| 156 | raise FileNotFoundError(f"Video not found: {video_path}") |
| 157 | |
| 158 | with open(video_path, "rb") as f: |
| 159 | video_bytes = f.read() |
| 160 | |
| 161 | video_base64 = base64.b64encode(video_bytes).decode("utf-8") |
| 162 | data_url = f"data:video/mp4;base64,{video_base64}" |
| 163 | |
| 164 | retry_count = 0 |
| 165 | while retry_count < max_retries: |
| 166 | try: |
| 167 | completion = client.chat.completions.create( |
| 168 | model=model_name, |
| 169 | messages=[ |
| 170 | { |
| 171 | "role": "user", |
| 172 | "content": [ |
| 173 | {"type": "text", "text": prompt}, |
| 174 | {"type": "image_url", "image_url": {"url": data_url, "detail": "high"}, "media_type": "video/mp4"}, |
| 175 | ], |
| 176 | } |
| 177 | ], |
| 178 | max_tokens=max_tokens, |
| 179 | extra_headers=extra_headers, |
| 180 | ) |
| 181 | return completion |
no test coverage detected