Rates a conversation based on rubric criteria. Args: messages: A list of conversation messages between user and agent. Returns: A dictionary containing rating information including score.
(self, messages: list[dict[str, Any]])
| 159 | |
| 160 | @retry(tries=3, delay=2, backoff=2) |
| 161 | def __call__(self, messages: list[dict[str, Any]]) -> dict[str, Any]: |
| 162 | """Rates a conversation based on rubric criteria. |
| 163 | |
| 164 | Args: |
| 165 | messages: A list of conversation messages between user and agent. |
| 166 | |
| 167 | Returns: |
| 168 | A dictionary containing rating information including score. |
| 169 | """ |
| 170 | env = jinja2.Environment() |
| 171 | env.globals['user_input'] = ( |
| 172 | messages[0].get('parts', [{}])[0].get('text', '') if messages else '' |
| 173 | ) |
| 174 | env.globals['developer_instructions'] = self._developer_instructions |
| 175 | env.globals['tool_declarations'] = self._tool_declarations |
| 176 | env.globals['model_response'] = format_user_agent_conversation(messages) |
| 177 | env.globals['decomposed_rubric'] = '* ' + self._rubric |
| 178 | contents = env.from_string(self._rubric_validation_template).render() |
| 179 | resp = self._client.models.generate_content( |
| 180 | model='gemini-2.5-pro', |
| 181 | contents=contents, |
| 182 | config=types.GenerateContentConfig( |
| 183 | candidate_count=1, |
| 184 | thinking_config=types.ThinkingConfig( |
| 185 | include_thoughts=True, thinking_budget=-1 |
| 186 | ), |
| 187 | ), |
| 188 | ) |
| 189 | got = parse_rubric_validation_response(resp.text) |
| 190 | got = dict(got) |
| 191 | got['score'] = float(got['verdict'] == 'yes') |
| 192 | got['rating_criteria'] = got.pop('property') |
| 193 | return got |
nothing calls this directly
no test coverage detected