MCPcopy
hub / github.com/rohitg00/agentmemory / evalBm25Only

Function evalBm25Only

benchmark/quality-eval.ts:123–171  ·  view source on GitHub ↗
(
  observations: CompressedObservation[],
  queries: LabeledQuery[],
)

Source from the content-addressed store, hash-verified

121}
122
123async function evalBm25Only(
124 observations: CompressedObservation[],
125 queries: LabeledQuery[],
126): Promise<SystemMetrics> {
127 const index = new SearchIndex();
128 for (const obs of observations) index.add(obs);
129
130 const perQuery: QualityMetrics[] = [];
131
132 for (const q of queries) {
133 const relevant = new Set(q.relevantObsIds);
134 const start = performance.now();
135 const results = index.search(q.query, 20);
136 const latency = performance.now() - start;
137
138 const retrieved = results.map(r => r.obsId);
139 perQuery.push({
140 query: q.query,
141 category: q.category,
142 recall_at_5: recall(retrieved, relevant, 5),
143 recall_at_10: recall(retrieved, relevant, 10),
144 recall_at_20: recall(retrieved, relevant, 20),
145 precision_at_5: precision(retrieved, relevant, 5),
146 precision_at_10: precision(retrieved, relevant, 10),
147 ndcg_at_10: ndcg(retrieved, relevant, 10),
148 mrr: mrr(retrieved, relevant),
149 relevant_count: relevant.size,
150 retrieved_count: results.length,
151 latency_ms: latency,
152 });
153 }
154
155 const avgTokens = perQuery.reduce((sum, q) => sum + q.retrieved_count, 0) / perQuery.length;
156 const avgObsTokens = observations.slice(0, 50).reduce((s, o) => s + estimateTokens(JSON.stringify(o)), 0) / 50;
157
158 return {
159 system: "BM25-only",
160 avg_recall_at_5: avg(perQuery.map(q => q.recall_at_5)),
161 avg_recall_at_10: avg(perQuery.map(q => q.recall_at_10)),
162 avg_recall_at_20: avg(perQuery.map(q => q.recall_at_20)),
163 avg_precision_at_5: avg(perQuery.map(q => q.precision_at_5)),
164 avg_precision_at_10: avg(perQuery.map(q => q.precision_at_10)),
165 avg_ndcg_at_10: avg(perQuery.map(q => q.ndcg_at_10)),
166 avg_mrr: avg(perQuery.map(q => q.mrr)),
167 avg_latency_ms: avg(perQuery.map(q => q.latency_ms)),
168 total_tokens_per_query: Math.round(avgObsTokens * avgTokens),
169 per_query: perQuery,
170 };
171}
172
173async function evalDualStream(
174 observations: CompressedObservation[],

Callers 1

mainFunction · 0.85

Calls 9

addMethod · 0.95
searchMethod · 0.95
pushMethod · 0.80
recallFunction · 0.70
precisionFunction · 0.70
ndcgFunction · 0.70
mrrFunction · 0.70
estimateTokensFunction · 0.70
avgFunction · 0.70

Tested by

no test coverage detected