MCPcopy
hub / github.com/promptfoo/promptfoo / evaluate

Method evaluate

src/evaluator.ts:4735–4848  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

4733 }
4734
4735 async evaluate(): Promise<TEvaluation> {
4736 // Initialize OTEL SDK if tracing is enabled
4737 // Check env flag, test suite level, and default test metadata
4738 const tracingEnabled =
4739 getEnvBool('PROMPTFOO_TRACING_ENABLED', false) ||
4740 this.testSuite.tracing?.enabled === true ||
4741 (typeof this.testSuite.defaultTest === 'object' &&
4742 this.testSuite.defaultTest?.metadata?.tracingEnabled === true) ||
4743 this.testSuite.tests?.some((t) => t.metadata?.tracingEnabled === true);
4744 let otelInitialized = false;
4745 let otlpReceiverAcquired = false;
4746
4747 let evaluationError: unknown;
4748 try {
4749 otlpReceiverAcquired = await startOtlpReceiverIfNeeded(this.testSuite, this.store.id);
4750 if (tracingEnabled) {
4751 logger.debug('[Evaluator] Initializing OTEL SDK for tracing');
4752 const otelConfig = getDefaultOtelConfig();
4753 initializeOtel(otelConfig);
4754 otelInitialized = true;
4755 }
4756
4757 return await this._runEvaluation();
4758 } catch (error) {
4759 evaluationError = error;
4760 throw error;
4761 } finally {
4762 // Close the JSONL writers first, before the (possibly multi-second) OTEL / provider
4763 // teardown below, so the streamed file is fully flushed before the post-run rewrite
4764 // reads it back and the file handle is released promptly. allSettled so one writer's
4765 // close failure neither blocks cleanup nor masks another writer's error.
4766 const writerCloseResults = await Promise.allSettled(
4767 this.fileWriters.map((writer) => writer.close()),
4768 );
4769 const writerCloseErrors = writerCloseResults.flatMap((result) =>
4770 result.status === 'rejected' ? [result.reason] : [],
4771 );
4772
4773 let cleanupError: unknown;
4774 try {
4775 // Flush and shutdown OTEL SDK
4776 if (otelInitialized) {
4777 logger.debug('[Evaluator] Flushing OTEL spans...');
4778 await flushOtel();
4779 await shutdownOtel();
4780 }
4781
4782 if (otlpReceiverAcquired && isOtlpReceiverStarted()) {
4783 // Add a delay to allow providers to finish exporting spans
4784 logger.debug('[Evaluator] Waiting for span exports to complete...');
4785 await sleep(3000);
4786 }
4787 await stopOtlpReceiverIfNeeded(otlpReceiverAcquired, this.store.id);
4788
4789 // Clean up Python worker pools to prevent resource leaks
4790 await providerRegistry.shutdownAll();
4791
4792 // Log rate limit metrics for debugging before cleanup

Callers 12

evaluateFunction · 0.95
index.test.tsFile · 0.80
updateDerivedMetricsFunction · 0.80
executeActionMethod · 0.80
extractResponseFromFrameFunction · 0.80
getIframeContentFunction · 0.80
handleApprovalFunction · 0.80
callApiMethod · 0.80
callApiWithPoolMethod · 0.80
full-eval.tsFile · 0.80
full-eval.jsFile · 0.80

Calls 15

_runEvaluationMethod · 0.95
getEnvBoolFunction · 0.90
getDefaultOtelConfigFunction · 0.90
initializeOtelFunction · 0.90
flushOtelFunction · 0.90
shutdownOtelFunction · 0.90
isOtlpReceiverStartedFunction · 0.90
sleepFunction · 0.90
stopOtlpReceiverIfNeededFunction · 0.90
sanitizeProviderIdForLogFunction · 0.90
shutdownAllMethod · 0.80

Tested by

no test coverage detected