MCPcopy
hub / github.com/HKUDS/MiniRAG / _process_single_content

Function _process_single_content

minirag/operate.py:265–329  ·  view source on GitHub ↗
(chunk_key_dp: tuple[str, TextChunkSchema])

Source from the content-addressed store, hash-verified

263 already_relations = 0
264
265 async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
266 nonlocal already_processed, already_entities, already_relations
267 chunk_key = chunk_key_dp[0]
268 chunk_dp = chunk_key_dp[1]
269 content = chunk_dp["content"]
270 hint_prompt = entity_extract_prompt.format(**context_base, input_text=content)
271 final_result = await use_llm_func(hint_prompt)
272
273 history = pack_user_ass_to_openai_messages(hint_prompt, final_result)
274 for now_glean_index in range(entity_extract_max_gleaning):
275 glean_result = await use_llm_func(continue_prompt, history_messages=history)
276
277 history += pack_user_ass_to_openai_messages(continue_prompt, glean_result)
278 final_result += glean_result
279 if now_glean_index == entity_extract_max_gleaning - 1:
280 break
281
282 if_loop_result: str = await use_llm_func(
283 if_loop_prompt, history_messages=history
284 )
285 if_loop_result = if_loop_result.strip().strip('"').strip("'").lower()
286 if if_loop_result != "yes":
287 break
288
289 records = split_string_by_multi_markers(
290 final_result,
291 [context_base["record_delimiter"], context_base["completion_delimiter"]],
292 )
293
294 maybe_nodes = defaultdict(list)
295 maybe_edges = defaultdict(list)
296 for record in records:
297 record = re.search(r"\((.*)\)", record)
298 if record is None:
299 continue
300 record = record.group(1)
301 record_attributes = split_string_by_multi_markers(
302 record, [context_base["tuple_delimiter"]]
303 )
304 if_entities = await _handle_single_entity_extraction(
305 record_attributes, chunk_key
306 )
307 if if_entities is not None:
308 maybe_nodes[if_entities["entity_name"]].append(if_entities)
309 continue
310
311 if_relation = await _handle_single_relationship_extraction(
312 record_attributes, chunk_key
313 )
314 if if_relation is not None:
315 maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append(
316 if_relation
317 )
318 already_processed += 1
319 already_entities += len(maybe_nodes)
320 already_relations += len(maybe_edges)
321 now_ticks = PROMPTS["process_tickers"][
322 already_processed % len(PROMPTS["process_tickers"])

Callers 1

extract_entitiesFunction · 0.85

Tested by

no test coverage detected