(
keywords,
knowledge_graph_inst: BaseGraphStorage,
entities_vdb: BaseVectorStorage,
relationships_vdb: BaseVectorStorage,
text_chunks_db: BaseKVStorage[TextChunkSchema],
query_param: QueryParam,
)
| 741 | |
| 742 | |
| 743 | async def _build_global_query_context( |
| 744 | keywords, |
| 745 | knowledge_graph_inst: BaseGraphStorage, |
| 746 | entities_vdb: BaseVectorStorage, |
| 747 | relationships_vdb: BaseVectorStorage, |
| 748 | text_chunks_db: BaseKVStorage[TextChunkSchema], |
| 749 | query_param: QueryParam, |
| 750 | ): |
| 751 | results = await relationships_vdb.query(keywords, top_k=query_param.top_k) |
| 752 | |
| 753 | if not len(results): |
| 754 | return None |
| 755 | |
| 756 | edge_datas = await asyncio.gather( |
| 757 | *[knowledge_graph_inst.get_edge(r["src_id"], r["tgt_id"]) for r in results] |
| 758 | ) |
| 759 | |
| 760 | if not all([n is not None for n in edge_datas]): |
| 761 | logger.warning("Some edges are missing, maybe the storage is damaged") |
| 762 | edge_degree = await asyncio.gather( |
| 763 | *[knowledge_graph_inst.edge_degree(r["src_id"], r["tgt_id"]) for r in results] |
| 764 | ) |
| 765 | edge_datas = [ |
| 766 | {"src_id": k["src_id"], "tgt_id": k["tgt_id"], "rank": d, **v} |
| 767 | for k, v, d in zip(results, edge_datas, edge_degree) |
| 768 | if v is not None |
| 769 | ] |
| 770 | edge_datas = sorted( |
| 771 | edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True |
| 772 | ) |
| 773 | edge_datas = truncate_list_by_token_size( |
| 774 | edge_datas, |
| 775 | key=lambda x: x["description"], |
| 776 | max_token_size=query_param.max_token_for_global_context, |
| 777 | ) |
| 778 | |
| 779 | use_entities = await _find_most_related_entities_from_relationships( |
| 780 | edge_datas, query_param, knowledge_graph_inst |
| 781 | ) |
| 782 | use_text_units = await _find_related_text_unit_from_relationships( |
| 783 | edge_datas, query_param, text_chunks_db, knowledge_graph_inst |
| 784 | ) |
| 785 | logger.info( |
| 786 | f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} text units" |
| 787 | ) |
| 788 | relations_section_list = [ |
| 789 | ["id", "source", "target", "description", "keywords", "weight", "rank"] |
| 790 | ] |
| 791 | for i, e in enumerate(edge_datas): |
| 792 | relations_section_list.append( |
| 793 | [ |
| 794 | i, |
| 795 | e["src_id"], |
| 796 | e["tgt_id"], |
| 797 | e["description"], |
| 798 | e["keywords"], |
| 799 | e["weight"], |
| 800 | e["rank"], |
no test coverage detected