MCPcopy
hub / github.com/langroid/langroid / test_vector_stores_context_window

Function test_vector_stores_context_window

tests/main/test_vector_stores.py:401–454  ·  view source on GitHub ↗

Test whether retrieving context-window around matches is working.

(vecdb)

Source from the content-addressed store, hash-verified

399 indirect=True,
400)
401def test_vector_stores_context_window(vecdb):
402 """Test whether retrieving context-window around matches is working."""
403
404 phrases = SimpleNamespace(
405 CATS="Cats are quiet and clean.",
406 DOGS="Dogs are noisy and messy.",
407 GIRAFFES="Giraffes are tall and quiet.",
408 ELEPHANTS="Elephants are big and noisy.",
409 OWLS="Owls are quiet and nocturnal.",
410 BATS="Bats are nocturnal and noisy.",
411 )
412 text = "\n\n".join(vars(phrases).values())
413 doc = Document(content=text, metadata=DocMetaData(id="0"))
414 cfg = ParsingConfig(
415 splitter=Splitter.SIMPLE,
416 n_neighbor_ids=2,
417 chunk_size=1,
418 max_chunks=20,
419 min_chunk_chars=3,
420 discard_chunk_chars=1,
421 )
422
423 parser = Parser(cfg)
424 splits = parser.split([doc])
425
426 vecdb.create_collection(
427 collection_name=f"testcw-{os.environ.get('PYTEST_XDIST_WORKER', 'main')}",
428 replace=True,
429 )
430 vecdb.add_documents(splits)
431
432 # Test context window retrieval
433 docs_scores = vecdb.similar_texts_with_scores("What are Giraffes like?", k=1)
434 docs_scores = vecdb.add_context_window(docs_scores, neighbors=2)
435
436 assert len(docs_scores) == 1
437 giraffes, score = docs_scores[0]
438 assert all(
439 p in giraffes.content
440 for p in [
441 phrases.CATS,
442 phrases.DOGS,
443 phrases.GIRAFFES,
444 phrases.ELEPHANTS,
445 phrases.OWLS,
446 ]
447 )
448 # check they are in the right sequence
449 indices = [
450 giraffes.content.index(p)
451 for p in ["Cats", "Dogs", "Giraffes", "Elephants", "Owls"]
452 ]
453
454 assert indices == sorted(indices)
455
456
457@pytest.mark.parametrize(

Callers

nothing calls this directly

Calls 10

splitMethod · 0.95
DocumentClass · 0.90
DocMetaDataClass · 0.90
ParsingConfigClass · 0.90
ParserClass · 0.90
getMethod · 0.80
create_collectionMethod · 0.45
add_documentsMethod · 0.45
add_context_windowMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…