MCPcopy
hub / github.com/garrytan/gstack / combineVerdict

Function combineVerdict

browse/src/security.ts:149–262  ·  view source on GitHub ↗
(signals: LayerSignal[], opts: CombineVerdictOpts = {})

Source from the content-addressed store, hash-verified

147}
148
149export function combineVerdict(signals: LayerSignal[], opts: CombineVerdictOpts = {}): SecurityResult {
150 // Reduce to the strongest signal per layer. For transcript, we'll re-derive
151 // the label below; for scalar layers, max confidence is the layer signal.
152 const byLayerMax: Record<string, number> = {};
153 const transcriptSignals: LayerSignal[] = [];
154 for (const s of signals) {
155 byLayerMax[s.layer] = Math.max(byLayerMax[s.layer] ?? 0, s.confidence);
156 if (s.layer === 'transcript_classifier') transcriptSignals.push(s);
157 }
158 const content = byLayerMax.testsavant_content ?? 0;
159 const deberta = byLayerMax.deberta_content ?? 0;
160 const transcriptMax = byLayerMax.transcript_classifier ?? 0;
161 const canary = byLayerMax.canary ?? 0;
162
163 // Canary leak is deterministic. Never gated through ensemble.
164 if (canary >= 1.0) {
165 return {
166 verdict: 'block',
167 reason: 'canary_leaked',
168 signals,
169 confidence: 1.0,
170 };
171 }
172
173 // Transcript vote: pick the strongest signal (block > warn > none).
174 let transcriptVote: VoteStrength = 'none';
175 for (const s of transcriptSignals) {
176 const v = classifyTranscript(s);
177 if (v === 'block') { transcriptVote = 'block'; break; }
178 if (v === 'warn' && transcriptVote !== 'block') transcriptVote = 'warn';
179 }
180
181 // Scalar-layer votes.
182 const contentBlockVote = content >= THRESHOLDS.WARN;
183 const debertaBlockVote = deberta >= THRESHOLDS.WARN;
184
185 let blockVotes = 0;
186 if (contentBlockVote) blockVotes++;
187 if (debertaBlockVote) blockVotes++;
188 if (transcriptVote === 'block') blockVotes++;
189
190 // Ensemble: 2-of-N block-votes trigger BLOCK.
191 if (blockVotes >= 2) {
192 // Report confidence as the min of the contributing signals (weakest link),
193 // matching v1 behavior for consistency with the review banner.
194 const contributing: number[] = [];
195 if (contentBlockVote) contributing.push(content);
196 if (debertaBlockVote) contributing.push(deberta);
197 if (transcriptVote === 'block') contributing.push(transcriptMax);
198 return {
199 verdict: 'block',
200 reason: 'ensemble_agreement',
201 signals,
202 confidence: Math.min(...contributing),
203 };
204 }
205
206 // Single-layer BLOCK. For tool-output, BLOCK directly; for user-input,

Calls 2

classifyTranscriptFunction · 0.85
pushMethod · 0.45

Tested by 1

workerFunction · 0.72