MCPcopy
hub / github.com/mudler/LocalAI / build_reward_functions

Function build_reward_functions

backend/python/trl/reward_functions.py:182–236  ·  view source on GitHub ↗

Parse a JSON list of reward function specs and return a list of callables. Each spec is a dict with: - type: "builtin" or "inline" - name: function name - code: (inline only) Python function body - params: (optional) dict of string params applied via functools.partial

(specs_json)

Source from the content-addressed store, hash-verified

180# ---------------------------------------------------------------------------
181
182def build_reward_functions(specs_json):
183 """Parse a JSON list of reward function specs and return a list of callables.
184
185 Each spec is a dict with:
186 - type: "builtin" or "inline"
187 - name: function name
188 - code: (inline only) Python function body
189 - params: (optional) dict of string params applied via functools.partial
190 """
191 if isinstance(specs_json, str):
192 specs = json.loads(specs_json)
193 else:
194 specs = specs_json
195
196 if not isinstance(specs, list):
197 raise ValueError("reward_funcs must be a JSON array of reward function specs")
198
199 reward_funcs = []
200 for spec in specs:
201 spec_type = spec.get("type", "builtin")
202 name = spec.get("name", "")
203 params = spec.get("params", {})
204
205 if spec_type == "builtin":
206 if name not in BUILTIN_REGISTRY:
207 available = ", ".join(sorted(BUILTIN_REGISTRY.keys()))
208 raise ValueError(
209 f"Unknown builtin reward function '{name}'. Available: {available}"
210 )
211 func = BUILTIN_REGISTRY[name]
212 if params:
213 # Convert string params to appropriate types
214 typed_params = {}
215 for k, v in params.items():
216 try:
217 typed_params[k] = int(v)
218 except (ValueError, TypeError):
219 try:
220 typed_params[k] = float(v)
221 except (ValueError, TypeError):
222 typed_params[k] = v
223 func = functools.partial(func, **typed_params)
224 reward_funcs.append(func)
225
226 elif spec_type == "inline":
227 code = spec.get("code", "")
228 if not code.strip():
229 raise ValueError(f"Inline reward function '{name}' has no code")
230 func = compile_inline_reward(name, code)
231 reward_funcs.append(func)
232
233 else:
234 raise ValueError(f"Unknown reward function type '{spec_type}'. Use 'builtin' or 'inline'")
235
236 return reward_funcs

Callers 1

_do_trainingMethod · 0.90

Calls 5

compile_inline_rewardFunction · 0.85
keysMethod · 0.80
appendMethod · 0.80
getMethod · 0.45
itemsMethod · 0.45

Tested by

no test coverage detected