MCPcopy
hub / github.com/HKUDS/MiniRAG / _build_global_query_context

Function _build_global_query_context

minirag/operate.py:743–836  ·  view source on GitHub ↗
(
    keywords,
    knowledge_graph_inst: BaseGraphStorage,
    entities_vdb: BaseVectorStorage,
    relationships_vdb: BaseVectorStorage,
    text_chunks_db: BaseKVStorage[TextChunkSchema],
    query_param: QueryParam,
)

Source from the content-addressed store, hash-verified

741
742
743async def _build_global_query_context(
744 keywords,
745 knowledge_graph_inst: BaseGraphStorage,
746 entities_vdb: BaseVectorStorage,
747 relationships_vdb: BaseVectorStorage,
748 text_chunks_db: BaseKVStorage[TextChunkSchema],
749 query_param: QueryParam,
750):
751 results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
752
753 if not len(results):
754 return None
755
756 edge_datas = await asyncio.gather(
757 *[knowledge_graph_inst.get_edge(r["src_id"], r["tgt_id"]) for r in results]
758 )
759
760 if not all([n is not None for n in edge_datas]):
761 logger.warning("Some edges are missing, maybe the storage is damaged")
762 edge_degree = await asyncio.gather(
763 *[knowledge_graph_inst.edge_degree(r["src_id"], r["tgt_id"]) for r in results]
764 )
765 edge_datas = [
766 {"src_id": k["src_id"], "tgt_id": k["tgt_id"], "rank": d, **v}
767 for k, v, d in zip(results, edge_datas, edge_degree)
768 if v is not None
769 ]
770 edge_datas = sorted(
771 edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
772 )
773 edge_datas = truncate_list_by_token_size(
774 edge_datas,
775 key=lambda x: x["description"],
776 max_token_size=query_param.max_token_for_global_context,
777 )
778
779 use_entities = await _find_most_related_entities_from_relationships(
780 edge_datas, query_param, knowledge_graph_inst
781 )
782 use_text_units = await _find_related_text_unit_from_relationships(
783 edge_datas, query_param, text_chunks_db, knowledge_graph_inst
784 )
785 logger.info(
786 f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} text units"
787 )
788 relations_section_list = [
789 ["id", "source", "target", "description", "keywords", "weight", "rank"]
790 ]
791 for i, e in enumerate(edge_datas):
792 relations_section_list.append(
793 [
794 i,
795 e["src_id"],
796 e["tgt_id"],
797 e["description"],
798 e["keywords"],
799 e["weight"],
800 e["rank"],

Callers 2

global_queryFunction · 0.85
hybrid_queryFunction · 0.85

Calls 8

list_of_list_to_csvFunction · 0.85
getMethod · 0.80
queryMethod · 0.45
get_edgeMethod · 0.45
edge_degreeMethod · 0.45

Tested by

no test coverage detected