hub / github.com/promptfoo/promptfoo / createEvalWithResults

Function createEvalWithResults

test/models/evalPerformance.test.ts:31–85 · view source on GitHub ↗

* Helper to create an eval and add results for provider x test combinations. * Returns the eval and the expected counts.

(numProviders: number, numTests: number)

Source from the content-addressed store, hash-verified

29	* Returns the eval and the expected counts.
30	*/
31	async function createEvalWithResults(numProviders: number, numTests: number) {
32	const providers = Array.from({ length: numProviders }, (_, i) => ({ id: `provider-${i + 1}` }));
33	const tests = Array.from({ length: numTests }, (_, i) => ({ vars: { input: `test${i + 1}` } }));
34
35	const eval_ = await Eval.create(
36	{
37	providers,
38	prompts: ['Test prompt'],
39	tests,
40	},
41	[{ raw: 'Test prompt', label: 'Test prompt' }],
42	);
43
44	// Add results for each provider × test combination
45	for (let providerIdx = 0; providerIdx < numProviders; providerIdx++) {
46	for (let testIdx = 0; testIdx < numTests; testIdx++) {
47	await eval_.addResult({
48	description: `test-${providerIdx}-${testIdx}`,
49	promptIdx: 0,
50	testIdx,
51	testCase: { vars: { input: `test${testIdx + 1}` } },
52	promptId: 'test-prompt',
53	provider: { id: `provider-${providerIdx + 1}`, label: `Provider ${providerIdx + 1}` },
54	prompt: { raw: 'Test prompt', label: 'Test prompt' },
55	vars: { input: `test${testIdx + 1}` },
56	response: {
57	output: `response-${providerIdx}-${testIdx}`,
58	tokenUsage: { total: 10, prompt: 5, completion: 5, cached: 0 },
59	},
60	error: null,
61	failureReason: ResultFailureReason.NONE,
62	success: true,
63	score: 1,
64	latencyMs: 100,
65	gradingResult: {
66	pass: true,
67	score: 1,
68	reason: 'Pass',
69	namedScores: {},
70	tokensUsed: { total: 10, prompt: 5, completion: 5, cached: 0 },
71	componentResults: [],
72	},
73	namedScores: {},
74	cost: 0.001,
75	metadata: {},
76	});
77	}
78	}
79
80	return {
81	eval_,
82	expectedDistinctCount: numTests,
83	expectedTotalRowCount: numProviders * numTests,
84	};
85	}
86
87	describe('getCachedResultsCount', () => {
88	it('should count distinct test indices (unique test cases)', async () => {

Callers 1

evalPerformance.test.tsFile · 0.85

Calls 2

createMethod · 0.45

addResultMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…