* Helper to create an eval and add results for provider x test combinations. * Returns the eval and the expected counts.
(numProviders: number, numTests: number)
| 29 | * Returns the eval and the expected counts. |
| 30 | */ |
| 31 | async function createEvalWithResults(numProviders: number, numTests: number) { |
| 32 | const providers = Array.from({ length: numProviders }, (_, i) => ({ id: `provider-${i + 1}` })); |
| 33 | const tests = Array.from({ length: numTests }, (_, i) => ({ vars: { input: `test${i + 1}` } })); |
| 34 | |
| 35 | const eval_ = await Eval.create( |
| 36 | { |
| 37 | providers, |
| 38 | prompts: ['Test prompt'], |
| 39 | tests, |
| 40 | }, |
| 41 | [{ raw: 'Test prompt', label: 'Test prompt' }], |
| 42 | ); |
| 43 | |
| 44 | // Add results for each provider × test combination |
| 45 | for (let providerIdx = 0; providerIdx < numProviders; providerIdx++) { |
| 46 | for (let testIdx = 0; testIdx < numTests; testIdx++) { |
| 47 | await eval_.addResult({ |
| 48 | description: `test-${providerIdx}-${testIdx}`, |
| 49 | promptIdx: 0, |
| 50 | testIdx, |
| 51 | testCase: { vars: { input: `test${testIdx + 1}` } }, |
| 52 | promptId: 'test-prompt', |
| 53 | provider: { id: `provider-${providerIdx + 1}`, label: `Provider ${providerIdx + 1}` }, |
| 54 | prompt: { raw: 'Test prompt', label: 'Test prompt' }, |
| 55 | vars: { input: `test${testIdx + 1}` }, |
| 56 | response: { |
| 57 | output: `response-${providerIdx}-${testIdx}`, |
| 58 | tokenUsage: { total: 10, prompt: 5, completion: 5, cached: 0 }, |
| 59 | }, |
| 60 | error: null, |
| 61 | failureReason: ResultFailureReason.NONE, |
| 62 | success: true, |
| 63 | score: 1, |
| 64 | latencyMs: 100, |
| 65 | gradingResult: { |
| 66 | pass: true, |
| 67 | score: 1, |
| 68 | reason: 'Pass', |
| 69 | namedScores: {}, |
| 70 | tokensUsed: { total: 10, prompt: 5, completion: 5, cached: 0 }, |
| 71 | componentResults: [], |
| 72 | }, |
| 73 | namedScores: {}, |
| 74 | cost: 0.001, |
| 75 | metadata: {}, |
| 76 | }); |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | return { |
| 81 | eval_, |
| 82 | expectedDistinctCount: numTests, |
| 83 | expectedTotalRowCount: numProviders * numTests, |
| 84 | }; |
| 85 | } |
| 86 | |
| 87 | describe('getCachedResultsCount', () => { |
| 88 | it('should count distinct test indices (unique test cases)', async () => { |
no test coverage detected
searching dependent graphs…