hub / github.com/HKUDS/MiniRAG / _build_global_query_context

Function _build_global_query_context

minirag/operate.py:743–836 · view source on GitHub ↗

(
    keywords,
    knowledge_graph_inst: BaseGraphStorage,
    entities_vdb: BaseVectorStorage,
    relationships_vdb: BaseVectorStorage,
    text_chunks_db: BaseKVStorage[TextChunkSchema],
    query_param: QueryParam,
)

Source from the content-addressed store, hash-verified

741
742
743	async def _build_global_query_context(
744	keywords,
745	knowledge_graph_inst: BaseGraphStorage,
746	entities_vdb: BaseVectorStorage,
747	relationships_vdb: BaseVectorStorage,
748	text_chunks_db: BaseKVStorage[TextChunkSchema],
749	query_param: QueryParam,
750	):
751	results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
752
753	if not len(results):
754	return None
755
756	edge_datas = await asyncio.gather(
757	*[knowledge_graph_inst.get_edge(r["src_id"], r["tgt_id"]) for r in results]
758	)
759
760	if not all([n is not None for n in edge_datas]):
761	logger.warning("Some edges are missing, maybe the storage is damaged")
762	edge_degree = await asyncio.gather(
763	*[knowledge_graph_inst.edge_degree(r["src_id"], r["tgt_id"]) for r in results]
764	)
765	edge_datas = [
766	{"src_id": k["src_id"], "tgt_id": k["tgt_id"], "rank": d, **v}
767	for k, v, d in zip(results, edge_datas, edge_degree)
768	if v is not None
769	]
770	edge_datas = sorted(
771	edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
772	)
773	edge_datas = truncate_list_by_token_size(
774	edge_datas,
775	key=lambda x: x["description"],
776	max_token_size=query_param.max_token_for_global_context,
777	)
778
779	use_entities = await _find_most_related_entities_from_relationships(
780	edge_datas, query_param, knowledge_graph_inst
781	)
782	use_text_units = await _find_related_text_unit_from_relationships(
783	edge_datas, query_param, text_chunks_db, knowledge_graph_inst
784	)
785	logger.info(
786	f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} text units"
787	)
788	relations_section_list = [
789	["id", "source", "target", "description", "keywords", "weight", "rank"]
790	]
791	for i, e in enumerate(edge_datas):
792	relations_section_list.append(
793	[
794	i,
795	e["src_id"],
796	e["tgt_id"],
797	e["description"],
798	e["keywords"],
799	e["weight"],
800	e["rank"],

Callers 2

global_queryFunction · 0.85

hybrid_queryFunction · 0.85

Calls 8

truncate_list_by_token_sizeFunction · 0.85

_find_most_related_entities_from_relationshipsFunction · 0.85

_find_related_text_unit_from_relationshipsFunction · 0.85

list_of_list_to_csvFunction · 0.85

getMethod · 0.80

queryMethod · 0.45

get_edgeMethod · 0.45

edge_degreeMethod · 0.45

Tested by

no test coverage detected