( options: LiveTestOptions, )
| 41 | * Run the live test with a real LLM |
| 42 | */ |
| 43 | export async function runLiveTest( |
| 44 | options: LiveTestOptions, |
| 45 | ): Promise<TestResult> { |
| 46 | const { adapter, verbose = false } = options |
| 47 | |
| 48 | logSection('Live Skills Test') |
| 49 | logInfo(`Using adapter: ${adapter.name} with model: ${adapter.model}`) |
| 50 | |
| 51 | // Create shared storage that persists between phases |
| 52 | const storage = createTestStorage() |
| 53 | const driver = createNodeIsolateDriver({ |
| 54 | memoryLimit: 128, |
| 55 | timeout: 60000, // Longer timeout for real LLM |
| 56 | }) |
| 57 | |
| 58 | const result: TestResult = { |
| 59 | passed: false, |
| 60 | phases: { |
| 61 | phase1: { success: false }, |
| 62 | phase2: { success: false }, |
| 63 | }, |
| 64 | skillCreated: false, |
| 65 | skillUsed: false, |
| 66 | } |
| 67 | |
| 68 | // ========================================================================= |
| 69 | // Phase 1: First run - Create skill using code mode |
| 70 | // ========================================================================= |
| 71 | |
| 72 | logSection('Phase 1: Skill Creation') |
| 73 | logStep(1, 'Running code mode with real LLM (no existing skills)') |
| 74 | |
| 75 | try { |
| 76 | const messages1: Array<ModelMessage> = [ |
| 77 | { |
| 78 | role: 'user', |
| 79 | content: `What is 5 + 3? |
| 80 | |
| 81 | IMPORTANT INSTRUCTIONS: |
| 82 | 1. Use the execute_typescript tool to call external_add_numbers({ a: 5, b: 3 }) to get the answer |
| 83 | 2. After getting the result, use register_skill to save a reusable skill called "add_two_numbers" that wraps this pattern |
| 84 | 3. The skill should accept { a: number, b: number } as input and return the result from external_add_numbers |
| 85 | |
| 86 | Please complete all three steps: execute the code, register the skill, and tell me the answer.`, |
| 87 | }, |
| 88 | ] |
| 89 | |
| 90 | // Get tools and system prompt with skills integration |
| 91 | const { tools: tools1, systemPrompt: systemPrompt1 } = |
| 92 | await codeModeWithSkills({ |
| 93 | config: { |
| 94 | driver, |
| 95 | tools: [addNumbersTool], |
| 96 | timeout: 60000, |
| 97 | memoryLimit: 128, |
| 98 | }, |
| 99 | adapter, |
| 100 | skills: { |
no test coverage detected