hub / github.com/aiming-lab/MetaClaw / prm_judge

Function prm_judge

scripts/run_memory_ablation.py:106–150 · view source on GitHub ↗

Rate response quality 0-10 via LLM-as-judge.

(context: str, question: str, response: str)

Source from the content-addressed store, hash-verified

104	# PRM Judge (single call per response, no max_tokens) #
105	# ------------------------------------------------------------------ #
106	async def prm_judge(context: str, question: str, response: str) -> dict:
107	"""Rate response quality 0-10 via LLM-as-judge."""
108	prompt = f"""Rate this AI response on a scale of 0 to 10.
109	Context from previous sessions: {context}
110	User question: {question}
111	Response: {response[:800]}
112	Reply ONLY with JSON: {{"score": <0-10>, "reasoning": "<brief>"}}"""
113
114	for attempt in range(3):
115	try:
116	async with httpx.AsyncClient(timeout=60.0) as client:
117	resp = await client.post(
118	f"{AZURE_API_BASE}/chat/completions",
119	json={
120	"model": AZURE_MODEL,
121	"messages": [{"role": "user", "content": prompt}],
122	"temperature": 0.1,
123	"max_completion_tokens": 150,
124	},
125	headers={
126	"Authorization": f"Bearer {AZURE_API_KEY}",
127	"Content-Type": "application/json",
128	},
129	)
130	resp.raise_for_status()
131	text = resp.json()["choices"][0]["message"]["content"].strip()
132	if "```" in text:
133	text = text.split("```")[1]
134	if text.startswith("json"):
135	text = text[4:]
136	result = json.loads(text)
137	return {"score": float(result.get("score", 5)) / 10.0, "reasoning": result.get("reasoning", "")}
138	except httpx.HTTPStatusError as e:
139	if e.response.status_code == 429:
140	wait = 15 * (attempt + 1)
141	logger.warning(" PRM judge rate limited, waiting %ds...", wait)
142	await asyncio.sleep(wait)
143	continue
144	logger.warning(" PRM judge HTTP error: %s", e)
145	return {"score": 0.5, "reasoning": f"http_error_{e.response.status_code}"}
146	except Exception as e:
147	logger.warning(" PRM judge error: %s", e)
148	return {"score": 0.5, "reasoning": "parse_error"}
149
150	return {"score": 0.5, "reasoning": "rate_limited"}
151
152
153	# ------------------------------------------------------------------ #

Callers 1

run_conditionFunction · 0.70

Calls 2

getMethod · 0.45

warningMethod · 0.45

Tested by

no test coverage detected