MCPcopy
hub / github.com/TanStack/ai / main

Function main

packages/ai-code-mode/models-eval/run-eval.ts:731–958  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

729}
730
731async function main() {
732 const {
733 ollamaOnly,
734 noJudge,
735 captureOnly,
736 judgeLatest,
737 rejudge,
738 modelFilter,
739 } = parseArgs(process.argv.slice(2))
740
741 mkdirSync(LOG_DIR, { recursive: true })
742 if (judgeLatest) {
743 await judgeLatestSession(rejudge)
744 return
745 }
746
747 const effectiveNoJudge = noJudge || captureOnly
748 const sessionStartedAt = new Date().toISOString()
749 console.log(
750 `[models-eval] Run logs → ${relative(process.cwd(), LOG_DIR) || LOG_DIR} (gitignored)`,
751 )
752
753 if (!effectiveNoJudge && !process.env.ANTHROPIC_API_KEY) {
754 console.warn(
755 '[models-eval] ANTHROPIC_API_KEY is not set; judging will fail. Use --no-judge to skip, or set the key.',
756 )
757 }
758
759 const gold = loadGoldFile()
760 const query = gold.prompt
761 const goldReport = extractReportTextFromGold(gold.messages)
762
763 const { tool, systemPrompt } = getCodeModeTools()
764 const tools = [tool, getSchemaInfoTool]
765 const systemPrompts = [DATABASE_DEMO_SYSTEM_PROMPT, systemPrompt]
766
767 let models = ollamaOnly
768 ? EVAL_MODELS.filter((m) => m.model.startsWith('ollama:'))
769 : EVAL_MODELS
770
771 if (modelFilter && modelFilter.size > 0) {
772 models = models.filter((m) => modelFilter.has(m.model))
773 if (models.length === 0) {
774 console.error(
775 '[models-eval] No models matched --models filter. Check eval-config model ids.',
776 )
777 process.exit(1)
778 }
779 }
780
781 const rows: Array<EvalRow> = []
782
783 for (const entry of models) {
784 const { provider, modelId } = parseModelId(entry.model)
785 const modelCategory = getModelCategory(entry.model)
786 console.log(`\n→ Running: ${entry.name} (${entry.model})`)
787 const adapter = getTextAdapter(provider, modelId)
788 const t0 = Date.now()

Callers 1

run-eval.tsFile · 0.70

Calls 15

processMethod · 0.95
getMessagesMethod · 0.95
parseModelIdFunction · 0.90
getModelCategoryFunction · 0.90
normalizeToUIMessageFunction · 0.90
chatFunction · 0.90
maxIterationsFunction · 0.90
computeMetricsFunction · 0.90
judgeReportsFunction · 0.90
formatTypescriptEvidenceFunction · 0.90
judgeLatestSessionFunction · 0.85
loadGoldFileFunction · 0.85

Tested by

no test coverage detected