MCPcopy Index your code
hub / github.com/Doorman11991/smallcode / caseContractGuard

Function caseContractGuard

test/e2e_smoke.js:129–195  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

127// ─── case 2: contract create + done-guard ───────────────────────────────────
128
129async function caseContractGuard() {
130 console.log('');
131 console.log(paint('Case 2 — contract create + done-guard', C.bold));
132
133 // Single-turn, multi-step prompt: create a contract with two assertions,
134 // mark only the first one passed, then claim "all done". The guard must
135 // intercept the wrap-up. We expect to see a [CONTRACT-GUARD] injection
136 // OR — if the model recovers correctly after the guard fires — the on-disk
137 // state should show the assertions resolved through the contract tools.
138 const prompt =
139 'Step 1: Use contract_create to declare a Definition of Done with title ' +
140 '"smoke" and these two assertions: "smoke step one passes", "smoke step ' +
141 'two passes". ' +
142 'Step 2: Use contract_assert_pass on a01 with evidence "verified by smoke ' +
143 'test". ' +
144 'Step 3: Reply with the single line "All done — task is complete." (do ' +
145 'NOT mark a02 as passed; leave it pending intentionally).';
146
147 const res = await runAgent(prompt);
148 const merged = (res.stdout + res.stderr);
149 // The fullscreen TUI emits "⚙ <tool> ✓ <ms>" for tool calls. Use the tool
150 // names as the success signal — they're the closest thing to a structured
151 // event we can observe from outside the agent.
152 const toolFired = (name) => new RegExp(`⚙\\s*${name}`).test(merged);
153 const sawCreate = toolFired('contract_create');
154 const sawPass = toolFired('contract_assert_pass');
155 const sawGuard = /CONTRACT-GUARD/.test(merged) || /contract guard:/.test(merged);
156
157 // Inspect the contract on disk for ground truth — the agent's tool calls
158 // should have left a state.json behind. Two valid outcomes:
159 // (a) a01 passed, a02 pending → guard fired, model didn't recover
160 // (b) every assertion resolved → guard fired, model used skip/pass to
161 // recover (this is the correct behaviour for the agent)
162 const contractsRoot = path.join(res.cwd, '.smallcode', 'contracts');
163 let stateOk = false;
164 let stateDetail = '(no state.json)';
165 try {
166 if (fs.existsSync(contractsRoot)) {
167 const ids = fs.readdirSync(contractsRoot).filter((f) => !f.startsWith('.'));
168 if (ids.length > 0) {
169 const state = JSON.parse(fs.readFileSync(path.join(contractsRoot, ids[0], 'state.json'), 'utf-8'));
170 if (state.assertions && state.assertions.length === 2) {
171 const a01 = state.assertions[0].state;
172 const a02 = state.assertions[1].state;
173 const a01Resolved = ['passed', 'skipped'].includes(a01);
174 const a02Resolved = ['passed', 'skipped', 'failed'].includes(a02);
175 // a01 must be marked, and a02 must either still be pending (guard
176 // fired and model honoured the failure) or resolved (guard fired
177 // and model recovered).
178 stateOk = a01Resolved && (a02 === 'pending' || a02Resolved);
179 stateDetail = `a01=${a01} a02=${a02}`;
180 }
181 }
182 }
183 } catch (e) {
184 stateDetail = `(read error: ${e.message})`;
185 }
186

Callers 1

e2e_smoke.jsFile · 0.85

Calls 4

runAgentFunction · 0.85
toolFiredFunction · 0.85
paintFunction · 0.70
checkFunction · 0.70

Tested by

no test coverage detected