Compile user-provided code into a reward function. The code should be the body of a function that receives `completions` (list[str]) and `**kwargs`, and returns list[float]. Available modules: re, math, json, string.
(name, code)
| 131 | |
| 132 | |
| 133 | def compile_inline_reward(name, code): |
| 134 | """Compile user-provided code into a reward function. |
| 135 | |
| 136 | The code should be the body of a function that receives |
| 137 | `completions` (list[str]) and `**kwargs`, and returns list[float]. |
| 138 | |
| 139 | Available modules: re, math, json, string. |
| 140 | """ |
| 141 | func_source = ( |
| 142 | f"def _user_reward_{name}(completions, **kwargs):\n" |
| 143 | + "\n".join(f" {line}" for line in code.splitlines()) |
| 144 | ) |
| 145 | |
| 146 | restricted_globals = { |
| 147 | "__builtins__": _SAFE_BUILTINS, |
| 148 | "re": re, |
| 149 | "math": math, |
| 150 | "json": json, |
| 151 | "string": string, |
| 152 | } |
| 153 | |
| 154 | try: |
| 155 | compiled = compile(func_source, f"<inline-reward-{name}>", "exec") |
| 156 | except SyntaxError as e: |
| 157 | raise ValueError(f"Syntax error in inline reward function '{name}': {e}") |
| 158 | |
| 159 | exec(compiled, restricted_globals) |
| 160 | func = restricted_globals[f"_user_reward_{name}"] |
| 161 | |
| 162 | # Validate with a quick smoke test |
| 163 | try: |
| 164 | result = func(["test"], answer=["test"]) |
| 165 | if not isinstance(result, list): |
| 166 | raise ValueError( |
| 167 | f"Inline reward function '{name}' must return a list, got {type(result).__name__}" |
| 168 | ) |
| 169 | except Exception as e: |
| 170 | if "must return a list" in str(e): |
| 171 | raise |
| 172 | # Other errors during smoke test are acceptable (e.g. missing kwargs) |
| 173 | pass |
| 174 | |
| 175 | return func |
| 176 | |
| 177 | |
| 178 | # --------------------------------------------------------------------------- |
no outgoing calls
no test coverage detected