MCPcopy
hub / github.com/promptfoo/promptfoo / createEvalWithResults

Function createEvalWithResults

test/models/evalPerformance.test.ts:31–85  ·  view source on GitHub ↗

* Helper to create an eval and add results for provider x test combinations. * Returns the eval and the expected counts.

(numProviders: number, numTests: number)

Source from the content-addressed store, hash-verified

29 * Returns the eval and the expected counts.
30 */
31 async function createEvalWithResults(numProviders: number, numTests: number) {
32 const providers = Array.from({ length: numProviders }, (_, i) => ({ id: `provider-${i + 1}` }));
33 const tests = Array.from({ length: numTests }, (_, i) => ({ vars: { input: `test${i + 1}` } }));
34
35 const eval_ = await Eval.create(
36 {
37 providers,
38 prompts: ['Test prompt'],
39 tests,
40 },
41 [{ raw: 'Test prompt', label: 'Test prompt' }],
42 );
43
44 // Add results for each provider × test combination
45 for (let providerIdx = 0; providerIdx < numProviders; providerIdx++) {
46 for (let testIdx = 0; testIdx < numTests; testIdx++) {
47 await eval_.addResult({
48 description: `test-${providerIdx}-${testIdx}`,
49 promptIdx: 0,
50 testIdx,
51 testCase: { vars: { input: `test${testIdx + 1}` } },
52 promptId: 'test-prompt',
53 provider: { id: `provider-${providerIdx + 1}`, label: `Provider ${providerIdx + 1}` },
54 prompt: { raw: 'Test prompt', label: 'Test prompt' },
55 vars: { input: `test${testIdx + 1}` },
56 response: {
57 output: `response-${providerIdx}-${testIdx}`,
58 tokenUsage: { total: 10, prompt: 5, completion: 5, cached: 0 },
59 },
60 error: null,
61 failureReason: ResultFailureReason.NONE,
62 success: true,
63 score: 1,
64 latencyMs: 100,
65 gradingResult: {
66 pass: true,
67 score: 1,
68 reason: 'Pass',
69 namedScores: {},
70 tokensUsed: { total: 10, prompt: 5, completion: 5, cached: 0 },
71 componentResults: [],
72 },
73 namedScores: {},
74 cost: 0.001,
75 metadata: {},
76 });
77 }
78 }
79
80 return {
81 eval_,
82 expectedDistinctCount: numTests,
83 expectedTotalRowCount: numProviders * numTests,
84 };
85 }
86
87 describe('getCachedResultsCount', () => {
88 it('should count distinct test indices (unique test cases)', async () => {

Callers 1

Calls 2

createMethod · 0.45
addResultMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…