(ctx context.Context, p Probe)
| 229 | func (c *ScoreClassifier) Name() string { return ClassifierScore } |
| 230 | |
| 231 | func (c *ScoreClassifier) Classify(ctx context.Context, p Probe) (Decision, error) { |
| 232 | start := time.Now() |
| 233 | |
| 234 | // Trim oldest turns until the rendered prompt fits the classifier's |
| 235 | // context. Cache-keyed on the trimmed text so conversations that |
| 236 | // trim to the same tail share an entry. |
| 237 | userText := trimmedProbeText(p, c.budget, func(joined string) (string, error) { |
| 238 | return c.renderer(c.systemPrompt, joined) |
| 239 | }) |
| 240 | |
| 241 | key := cacheKey(userText) |
| 242 | if hit, ok := c.cache.get(key); ok { |
| 243 | return Decision{Labels: hit, Score: 1.0, Latency: time.Since(start)}, nil |
| 244 | } |
| 245 | prompt, err := c.renderer(c.systemPrompt, userText) |
| 246 | if err != nil { |
| 247 | return errDecision(start, fmt.Errorf("score classify: render prompt: %w", err)) |
| 248 | } |
| 249 | results, err := c.scorer.Score(ctx, prompt, c.candidates) |
| 250 | if err != nil { |
| 251 | xlog.Warn("router: score classifier failed", "error", err, "labels", c.labelOrder) |
| 252 | return errDecision(start, fmt.Errorf("score classify: %w", err)) |
| 253 | } |
| 254 | if len(results) != len(c.labelOrder) { |
| 255 | return errDecision(start, fmt.Errorf("score classify: scorer returned %d results for %d policies", len(results), len(c.labelOrder))) |
| 256 | } |
| 257 | |
| 258 | // Convert per-candidate joint log-probs to the softmax inputs the |
| 259 | // activation threshold reads. Default mode (raw) is on-distribution |
| 260 | // for Arch-Router: longer candidates score lower for legitimate |
| 261 | // reasons (the model assigns less probability to outputs that span |
| 262 | // more tokens). Mean normalisation is available for operators with |
| 263 | // highly uneven label token counts who'd rather spend the off- |
| 264 | // distribution cost than the length bias. |
| 265 | logProbs := make([]float64, len(results)) |
| 266 | nonZero := 0 |
| 267 | for i, r := range results { |
| 268 | if r.NumTokens == 0 { |
| 269 | logProbs[i] = math.Inf(-1) |
| 270 | continue |
| 271 | } |
| 272 | nonZero++ |
| 273 | switch c.normalization { |
| 274 | case ScoreNormalizationMean: |
| 275 | logProbs[i] = r.LogProb / float64(r.NumTokens) |
| 276 | default: |
| 277 | logProbs[i] = r.LogProb |
| 278 | } |
| 279 | } |
| 280 | // All-zero NumTokens means the backend never tokenised any |
| 281 | // candidate — almost certainly a Scorer regression (forgot to |
| 282 | // populate NumTokens) rather than a real distribution. Without |
| 283 | // this check softmax degenerates to uniform 1/N, every label |
| 284 | // clears the activation threshold, and the router silently treats |
| 285 | // the bug as "multi-label intent". Fail loud instead so the |
| 286 | // operator sees the cause. |
| 287 | if nonZero == 0 { |
| 288 | return errDecision(start, fmt.Errorf("score classify: backend returned zero tokens for every candidate (scorer regression?)")) |
nothing calls this directly
no test coverage detected