()
| 4733 | } |
| 4734 | |
| 4735 | async evaluate(): Promise<TEvaluation> { |
| 4736 | // Initialize OTEL SDK if tracing is enabled |
| 4737 | // Check env flag, test suite level, and default test metadata |
| 4738 | const tracingEnabled = |
| 4739 | getEnvBool('PROMPTFOO_TRACING_ENABLED', false) || |
| 4740 | this.testSuite.tracing?.enabled === true || |
| 4741 | (typeof this.testSuite.defaultTest === 'object' && |
| 4742 | this.testSuite.defaultTest?.metadata?.tracingEnabled === true) || |
| 4743 | this.testSuite.tests?.some((t) => t.metadata?.tracingEnabled === true); |
| 4744 | let otelInitialized = false; |
| 4745 | let otlpReceiverAcquired = false; |
| 4746 | |
| 4747 | let evaluationError: unknown; |
| 4748 | try { |
| 4749 | otlpReceiverAcquired = await startOtlpReceiverIfNeeded(this.testSuite, this.store.id); |
| 4750 | if (tracingEnabled) { |
| 4751 | logger.debug('[Evaluator] Initializing OTEL SDK for tracing'); |
| 4752 | const otelConfig = getDefaultOtelConfig(); |
| 4753 | initializeOtel(otelConfig); |
| 4754 | otelInitialized = true; |
| 4755 | } |
| 4756 | |
| 4757 | return await this._runEvaluation(); |
| 4758 | } catch (error) { |
| 4759 | evaluationError = error; |
| 4760 | throw error; |
| 4761 | } finally { |
| 4762 | // Close the JSONL writers first, before the (possibly multi-second) OTEL / provider |
| 4763 | // teardown below, so the streamed file is fully flushed before the post-run rewrite |
| 4764 | // reads it back and the file handle is released promptly. allSettled so one writer's |
| 4765 | // close failure neither blocks cleanup nor masks another writer's error. |
| 4766 | const writerCloseResults = await Promise.allSettled( |
| 4767 | this.fileWriters.map((writer) => writer.close()), |
| 4768 | ); |
| 4769 | const writerCloseErrors = writerCloseResults.flatMap((result) => |
| 4770 | result.status === 'rejected' ? [result.reason] : [], |
| 4771 | ); |
| 4772 | |
| 4773 | let cleanupError: unknown; |
| 4774 | try { |
| 4775 | // Flush and shutdown OTEL SDK |
| 4776 | if (otelInitialized) { |
| 4777 | logger.debug('[Evaluator] Flushing OTEL spans...'); |
| 4778 | await flushOtel(); |
| 4779 | await shutdownOtel(); |
| 4780 | } |
| 4781 | |
| 4782 | if (otlpReceiverAcquired && isOtlpReceiverStarted()) { |
| 4783 | // Add a delay to allow providers to finish exporting spans |
| 4784 | logger.debug('[Evaluator] Waiting for span exports to complete...'); |
| 4785 | await sleep(3000); |
| 4786 | } |
| 4787 | await stopOtlpReceiverIfNeeded(otlpReceiverAcquired, this.store.id); |
| 4788 | |
| 4789 | // Clean up Python worker pools to prevent resource leaks |
| 4790 | await providerRegistry.shutdownAll(); |
| 4791 | |
| 4792 | // Log rate limit metrics for debugging before cleanup |
no test coverage detected