Test whether retrieving context-window around matches is working.
(vecdb)
| 399 | indirect=True, |
| 400 | ) |
| 401 | def test_vector_stores_context_window(vecdb): |
| 402 | """Test whether retrieving context-window around matches is working.""" |
| 403 | |
| 404 | phrases = SimpleNamespace( |
| 405 | CATS="Cats are quiet and clean.", |
| 406 | DOGS="Dogs are noisy and messy.", |
| 407 | GIRAFFES="Giraffes are tall and quiet.", |
| 408 | ELEPHANTS="Elephants are big and noisy.", |
| 409 | OWLS="Owls are quiet and nocturnal.", |
| 410 | BATS="Bats are nocturnal and noisy.", |
| 411 | ) |
| 412 | text = "\n\n".join(vars(phrases).values()) |
| 413 | doc = Document(content=text, metadata=DocMetaData(id="0")) |
| 414 | cfg = ParsingConfig( |
| 415 | splitter=Splitter.SIMPLE, |
| 416 | n_neighbor_ids=2, |
| 417 | chunk_size=1, |
| 418 | max_chunks=20, |
| 419 | min_chunk_chars=3, |
| 420 | discard_chunk_chars=1, |
| 421 | ) |
| 422 | |
| 423 | parser = Parser(cfg) |
| 424 | splits = parser.split([doc]) |
| 425 | |
| 426 | vecdb.create_collection( |
| 427 | collection_name=f"testcw-{os.environ.get('PYTEST_XDIST_WORKER', 'main')}", |
| 428 | replace=True, |
| 429 | ) |
| 430 | vecdb.add_documents(splits) |
| 431 | |
| 432 | # Test context window retrieval |
| 433 | docs_scores = vecdb.similar_texts_with_scores("What are Giraffes like?", k=1) |
| 434 | docs_scores = vecdb.add_context_window(docs_scores, neighbors=2) |
| 435 | |
| 436 | assert len(docs_scores) == 1 |
| 437 | giraffes, score = docs_scores[0] |
| 438 | assert all( |
| 439 | p in giraffes.content |
| 440 | for p in [ |
| 441 | phrases.CATS, |
| 442 | phrases.DOGS, |
| 443 | phrases.GIRAFFES, |
| 444 | phrases.ELEPHANTS, |
| 445 | phrases.OWLS, |
| 446 | ] |
| 447 | ) |
| 448 | # check they are in the right sequence |
| 449 | indices = [ |
| 450 | giraffes.content.index(p) |
| 451 | for p in ["Cats", "Dogs", "Giraffes", "Elephants", "Owls"] |
| 452 | ] |
| 453 | |
| 454 | assert indices == sorted(indices) |
| 455 | |
| 456 | |
| 457 | @pytest.mark.parametrize( |
nothing calls this directly
no test coverage detected
searching dependent graphs…