hub / github.com/Doorman11991/smallcode / caseContractGuard

Function caseContractGuard

test/e2e_smoke.js:129–195 · view source on GitHub ↗

()

Source from the content-addressed store, hash-verified

127	// ─── case 2: contract create + done-guard ───────────────────────────────────
128
129	async function caseContractGuard() {
130	console.log('');
131	console.log(paint('Case 2 — contract create + done-guard', C.bold));
132
133	// Single-turn, multi-step prompt: create a contract with two assertions,
134	// mark only the first one passed, then claim "all done". The guard must
135	// intercept the wrap-up. We expect to see a [CONTRACT-GUARD] injection
136	// OR — if the model recovers correctly after the guard fires — the on-disk
137	// state should show the assertions resolved through the contract tools.
138	const prompt =
139	'Step 1: Use contract_create to declare a Definition of Done with title ' +
140	'"smoke" and these two assertions: "smoke step one passes", "smoke step ' +
141	'two passes". ' +
142	'Step 2: Use contract_assert_pass on a01 with evidence "verified by smoke ' +
143	'test". ' +
144	'Step 3: Reply with the single line "All done — task is complete." (do ' +
145	'NOT mark a02 as passed; leave it pending intentionally).';
146
147	const res = await runAgent(prompt);
148	const merged = (res.stdout + res.stderr);
149	// The fullscreen TUI emits "⚙ <tool> ✓ <ms>" for tool calls. Use the tool
150	// names as the success signal — they're the closest thing to a structured
151	// event we can observe from outside the agent.
152	const toolFired = (name) => new RegExp(`⚙\\s*${name}`).test(merged);
153	const sawCreate = toolFired('contract_create');
154	const sawPass = toolFired('contract_assert_pass');
155	const sawGuard = /CONTRACT-GUARD/.test(merged) \|\| /contract guard:/.test(merged);
156
157	// Inspect the contract on disk for ground truth — the agent's tool calls
158	// should have left a state.json behind. Two valid outcomes:
159	// (a) a01 passed, a02 pending → guard fired, model didn't recover
160	// (b) every assertion resolved → guard fired, model used skip/pass to
161	// recover (this is the correct behaviour for the agent)
162	const contractsRoot = path.join(res.cwd, '.smallcode', 'contracts');
163	let stateOk = false;
164	let stateDetail = '(no state.json)';
165	try {
166	if (fs.existsSync(contractsRoot)) {
167	const ids = fs.readdirSync(contractsRoot).filter((f) => !f.startsWith('.'));
168	if (ids.length > 0) {
169	const state = JSON.parse(fs.readFileSync(path.join(contractsRoot, ids[0], 'state.json'), 'utf-8'));
170	if (state.assertions && state.assertions.length === 2) {
171	const a01 = state.assertions[0].state;
172	const a02 = state.assertions[1].state;
173	const a01Resolved = ['passed', 'skipped'].includes(a01);
174	const a02Resolved = ['passed', 'skipped', 'failed'].includes(a02);
175	// a01 must be marked, and a02 must either still be pending (guard
176	// fired and model honoured the failure) or resolved (guard fired
177	// and model recovered).
178	stateOk = a01Resolved && (a02 === 'pending' \|\| a02Resolved);
179	stateDetail = `a01=${a01} a02=${a02}`;
180	}
181	}
182	}
183	} catch (e) {
184	stateDetail = `(read error: ${e.message})`;
185	}
186

Callers 1

e2e_smoke.jsFile · 0.85

Calls 4

runAgentFunction · 0.85

toolFiredFunction · 0.85

paintFunction · 0.70

checkFunction · 0.70

Tested by

no test coverage detected