MCPcopy
hub / github.com/pathwaycom/pathway / inputs_query

Method inputs_query

python/pathway/xpacks/llm/document_store.py:453–527  ·  view source on GitHub ↗

Query ``DocumentStore`` for the list of input documents.

(
        self, input_queries: pw.Table[InputsQuerySchema]
    )

Source from the content-addressed store, hash-verified

451
452 @pw.table_transformer
453 def inputs_query(
454 self, input_queries: pw.Table[InputsQuerySchema]
455 ) -> pw.Table[InputsResultSchema]:
456 """
457 Query ``DocumentStore`` for the list of input documents.
458 """
459 # TODO: compare this approach to first joining queries to dicuments, then filtering,
460 # then grouping to get each response.
461 # The "dumb" tuple approach has more work precomputed for an all inputs query
462 all_metas = self.progress_table.reduce(
463 metadatas=pw.reducers.tuple(pw.this.metadata),
464 is_parsed=pw.reducers.tuple(pw.this.is_parsed),
465 )
466
467 input_queries = self.merge_filters(input_queries)
468
469 @pw.udf
470 def format_inputs(
471 metadatas: list[pw.Json] | None,
472 metadata_filter: str | None,
473 return_status: bool,
474 is_parsed: list[bool] | None,
475 ) -> list[pw.Json]:
476 metadatas = metadatas if metadatas is not None else []
477 is_parsed = is_parsed if is_parsed is not None else []
478 assert metadatas is not None
479 assert is_parsed is not None
480
481 def remove_id(m):
482 metadata_dict = m.as_dict()
483 del metadata_dict["_file_id"]
484 return pw.Json(metadata_dict)
485
486 metadatas = [remove_id(m) for m in metadatas]
487 if metadata_filter:
488 metadatas = [
489 m
490 for m in metadatas
491 if jmespath.search(
492 metadata_filter, m.as_dict(), options=_knn_lsh._glob_options
493 )
494 ]
495
496 if return_status:
497 metadatas = [
498 pw.Json(
499 {
500 "_indexing_status": (
501 IndexingStatus.INDEXED
502 if status
503 else IndexingStatus.INGESTED
504 ),
505 **m.as_dict(),
506 }
507 )
508 for (m, status) in zip(metadatas, is_parsed)
509 ]
510

Callers 3

_test_vsFunction · 0.95
list_documentsMethod · 0.45
_test_vsFunction · 0.45

Calls 4

merge_filtersMethod · 0.95
join_leftMethod · 0.80
reduceMethod · 0.45
selectMethod · 0.45

Tested by 2

_test_vsFunction · 0.76
_test_vsFunction · 0.36