MCPcopy
hub / github.com/aiming-lab/MetaClaw / prm_judge

Function prm_judge

scripts/run_memory_ablation.py:106–150  ·  view source on GitHub ↗

Rate response quality 0-10 via LLM-as-judge.

(context: str, question: str, response: str)

Source from the content-addressed store, hash-verified

104# PRM Judge (single call per response, no max_tokens) #
105# ------------------------------------------------------------------ #
106async def prm_judge(context: str, question: str, response: str) -> dict:
107 """Rate response quality 0-10 via LLM-as-judge."""
108 prompt = f"""Rate this AI response on a scale of 0 to 10.
109Context from previous sessions: {context}
110User question: {question}
111Response: {response[:800]}
112Reply ONLY with JSON: {{"score": <0-10>, "reasoning": "<brief>"}}"""
113
114 for attempt in range(3):
115 try:
116 async with httpx.AsyncClient(timeout=60.0) as client:
117 resp = await client.post(
118 f"{AZURE_API_BASE}/chat/completions",
119 json={
120 "model": AZURE_MODEL,
121 "messages": [{"role": "user", "content": prompt}],
122 "temperature": 0.1,
123 "max_completion_tokens": 150,
124 },
125 headers={
126 "Authorization": f"Bearer {AZURE_API_KEY}",
127 "Content-Type": "application/json",
128 },
129 )
130 resp.raise_for_status()
131 text = resp.json()["choices"][0]["message"]["content"].strip()
132 if "```" in text:
133 text = text.split("```")[1]
134 if text.startswith("json"):
135 text = text[4:]
136 result = json.loads(text)
137 return {"score": float(result.get("score", 5)) / 10.0, "reasoning": result.get("reasoning", "")}
138 except httpx.HTTPStatusError as e:
139 if e.response.status_code == 429:
140 wait = 15 * (attempt + 1)
141 logger.warning(" PRM judge rate limited, waiting %ds...", wait)
142 await asyncio.sleep(wait)
143 continue
144 logger.warning(" PRM judge HTTP error: %s", e)
145 return {"score": 0.5, "reasoning": f"http_error_{e.response.status_code}"}
146 except Exception as e:
147 logger.warning(" PRM judge error: %s", e)
148 return {"score": 0.5, "reasoning": "parse_error"}
149
150 return {"score": 0.5, "reasoning": "rate_limited"}
151
152
153# ------------------------------------------------------------------ #

Callers 1

run_conditionFunction · 0.70

Calls 2

getMethod · 0.45
warningMethod · 0.45

Tested by

no test coverage detected