Reward function for Countdown Tasks. Total reward = 0.1 * format_reward + answer_reward
(
response: str,
numbers: List[int] = None,
target: int = None,
end_token: str = None,
)
| 146 | |
| 147 | |
| 148 | def reward_function( |
| 149 | response: str, |
| 150 | numbers: List[int] = None, |
| 151 | target: int = None, |
| 152 | end_token: str = None, |
| 153 | ) -> Dict[str, Any]: |
| 154 | """Reward function for Countdown Tasks. |
| 155 | |
| 156 | Total reward = 0.1 * format_reward + answer_reward |
| 157 | """ |
| 158 | format_reward = format_reward_function("<think>" + response, end_token) |
| 159 | answer_reward = answer_reward_function(response, numbers, target) |
| 160 | return { |
| 161 | "reward": format_reward * 0.1 + answer_reward, |
| 162 | "reward_info": { |
| 163 | "format_reward": format_reward, |
| 164 | "answer_reward": answer_reward, |
| 165 | }, |
| 166 | } |
no test coverage detected