MCPcopy
hub / github.com/rohitg00/agentmemory / evalDualStream

Function evalDualStream

benchmark/quality-eval.ts:173–240  ·  view source on GitHub ↗
(
  observations: CompressedObservation[],
  queries: LabeledQuery[],
)

Source from the content-addressed store, hash-verified

171}
172
173async function evalDualStream(
174 observations: CompressedObservation[],
175 queries: LabeledQuery[],
176): Promise<SystemMetrics> {
177 const kv = mockKV();
178 const bm25 = new SearchIndex();
179 const vector = new VectorIndex();
180 const dims = 384;
181
182 for (const obs of observations) {
183 bm25.add(obs);
184 const text = [obs.title, obs.narrative, ...obs.concepts, ...obs.facts].join(" ");
185 vector.add(obs.id, obs.sessionId, deterministicEmbedding(text, dims));
186 await kv.set(`mem:obs:${obs.sessionId}`, obs.id, obs);
187 }
188
189 const mockEmbed: any = {
190 name: "deterministic",
191 dimensions: dims,
192 embed: async (text: string) => deterministicEmbedding(text, dims),
193 embedBatch: async (texts: string[]) => texts.map(t => deterministicEmbedding(t, dims)),
194 };
195
196 const hybrid = new HybridSearch(bm25, vector, mockEmbed, kv as never, 0.4, 0.6, 0);
197 const perQuery: QualityMetrics[] = [];
198
199 for (const q of queries) {
200 const relevant = new Set(q.relevantObsIds);
201 const start = performance.now();
202 const results = await hybrid.search(q.query, 20);
203 const latency = performance.now() - start;
204
205 const retrieved = results.map(r => r.observation.id);
206 perQuery.push({
207 query: q.query,
208 category: q.category,
209 recall_at_5: recall(retrieved, relevant, 5),
210 recall_at_10: recall(retrieved, relevant, 10),
211 recall_at_20: recall(retrieved, relevant, 20),
212 precision_at_5: precision(retrieved, relevant, 5),
213 precision_at_10: precision(retrieved, relevant, 10),
214 ndcg_at_10: ndcg(retrieved, relevant, 10),
215 mrr: mrr(retrieved, relevant),
216 relevant_count: relevant.size,
217 retrieved_count: results.length,
218 latency_ms: latency,
219 });
220 }
221
222 const avgResultTokens = perQuery.reduce((sum, q) => {
223 return sum + q.retrieved_count;
224 }, 0) / perQuery.length;
225 const avgObsTokens2 = observations.slice(0, 50).reduce((s, o) => s + estimateTokens(JSON.stringify(o)), 0) / 50;
226
227 return {
228 system: "Dual-stream (BM25+Vector)",
229 avg_recall_at_5: avg(perQuery.map(q => q.recall_at_5)),
230 avg_recall_at_10: avg(perQuery.map(q => q.recall_at_10)),

Callers 1

mainFunction · 0.85

Calls 13

addMethod · 0.95
addMethod · 0.95
searchMethod · 0.95
pushMethod · 0.80
mockKVFunction · 0.70
deterministicEmbeddingFunction · 0.70
recallFunction · 0.70
precisionFunction · 0.70
ndcgFunction · 0.70
mrrFunction · 0.70
estimateTokensFunction · 0.70
avgFunction · 0.70

Tested by

no test coverage detected