MCPcopy
hub / github.com/garrytan/gstack / runSkillTest

Function runSkillTest

test/helpers/session-runner.ts:120–378  ·  view source on GitHub ↗
(options: {
  prompt: string;
  workingDirectory: string;
  maxTurns?: number;
  allowedTools?: string[];
  timeout?: number;
  testName?: string;
  runId?: string;
  /** Model to use. Defaults to claude-sonnet-4-6 (overridable via EVALS_MODEL env). */
  model?: string;
  /** Extra env vars merged into the spawned claude -p process. Useful for
   *  per-test GSTACK_HOME overrides so the test doesn't have to spell out
   *  env setup in the prompt itself. */
  env?: Record<string, string>;
})

Source from the content-addressed store, hash-verified

118// --- Main runner ---
119
120export async function runSkillTest(options: {
121 prompt: string;
122 workingDirectory: string;
123 maxTurns?: number;
124 allowedTools?: string[];
125 timeout?: number;
126 testName?: string;
127 runId?: string;
128 /** Model to use. Defaults to claude-sonnet-4-6 (overridable via EVALS_MODEL env). */
129 model?: string;
130 /** Extra env vars merged into the spawned claude -p process. Useful for
131 * per-test GSTACK_HOME overrides so the test doesn't have to spell out
132 * env setup in the prompt itself. */
133 env?: Record<string, string>;
134}): Promise<SkillTestResult> {
135 const {
136 prompt,
137 workingDirectory,
138 maxTurns = 15,
139 allowedTools = ['Bash', 'Read', 'Write'],
140 timeout = 120_000,
141 testName,
142 runId,
143 env: extraEnv,
144 } = options;
145 const model = options.model ?? process.env.EVALS_MODEL ?? 'claude-sonnet-4-6';
146
147 const startTime = Date.now();
148 const startedAt = new Date().toISOString();
149
150 // Set up per-run log directory if runId is provided
151 let runDir: string | null = null;
152 const safeName = testName ? sanitizeTestName(testName) : null;
153 if (runId) {
154 try {
155 runDir = path.join(PROJECT_DIR, 'e2e-runs', runId);
156 fs.mkdirSync(runDir, { recursive: true });
157 } catch { /* non-fatal */ }
158 }
159
160 // Spawn claude -p with streaming NDJSON output. Prompt piped via stdin to
161 // avoid shell escaping issues. --verbose is required for stream-json mode.
162 const args = [
163 '-p',
164 '--model', model,
165 '--output-format', 'stream-json',
166 '--verbose',
167 '--dangerously-skip-permissions',
168 '--max-turns', String(maxTurns),
169 '--allowed-tools', ...allowedTools,
170 ];
171 // Hermetic children get zero MCP servers (no --mcp-config is passed).
172 // Gated on the same call-time check as the env scrub so EVALS_HERMETIC=0
173 // restores operator MCP along with the operator env.
174 if (isHermeticEnabled()) args.push('--strict-mcp-config');
175
176 // Write prompt to a temp file OUTSIDE workingDirectory to avoid race conditions
177 // where afterAll cleanup deletes the dir before cat reads the file (especially

Calls 8

isHermeticEnabledFunction · 0.90
hermeticChildEnvFunction · 0.90
sanitizeTestNameFunction · 0.85
atomicWriteSyncFunction · 0.85
parseNDJSONFunction · 0.85
truncateFunction · 0.70
pushMethod · 0.45
textMethod · 0.45

Tested by 4

detectViaFunction · 0.72
checkCodexOfferingFunction · 0.72
runPlantedBugEvalFunction · 0.72
runPlantedBugEvalFunction · 0.72