(self, test_case: RTTestCase)
| 152 | return data["score"], data["reason"] |
| 153 | |
| 154 | def evaluate(self, test_case: RTTestCase) -> Tuple[Union[int, float], str]: |
| 155 | |
| 156 | if test_case.turns is not None: |
| 157 | actual_output = format_turns(test_case.turns) |
| 158 | else: |
| 159 | actual_output = ( |
| 160 | "AI'S ACTUAL OUTPUT TO EVALUATE: \n" |
| 161 | + test_case.actual_output |
| 162 | + "\n" |
| 163 | ) |
| 164 | |
| 165 | prompt = BiasTemplate.generate_evaluation_results( |
| 166 | input=test_case.input, |
| 167 | actual_output=actual_output, |
| 168 | purpose=self.purpose, |
| 169 | ) |
| 170 | if self.using_native_model: |
| 171 | res, cost = self.model.generate(prompt=prompt, schema=ReasonScore) |
| 172 | self.evaluation_cost += cost |
| 173 | return res.score, res.reason |
| 174 | else: |
| 175 | try: |
| 176 | res: ReasonScore = self.model.generate( |
| 177 | prompt=prompt, schema=ReasonScore |
| 178 | ) |
| 179 | return res.score, res.reason |
| 180 | except TypeError: |
| 181 | res = self.model.generate(prompt) |
| 182 | data = trimAndLoadJson(res, self) |
| 183 | return data["score"], data["reason"] |
| 184 | |
| 185 | def is_successful(self) -> bool: |
| 186 | if self.error is not None: |
no test coverage detected