(chunk_key_dp: tuple[str, TextChunkSchema])
| 263 | already_relations = 0 |
| 264 | |
| 265 | async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]): |
| 266 | nonlocal already_processed, already_entities, already_relations |
| 267 | chunk_key = chunk_key_dp[0] |
| 268 | chunk_dp = chunk_key_dp[1] |
| 269 | content = chunk_dp["content"] |
| 270 | hint_prompt = entity_extract_prompt.format(**context_base, input_text=content) |
| 271 | final_result = await use_llm_func(hint_prompt) |
| 272 | |
| 273 | history = pack_user_ass_to_openai_messages(hint_prompt, final_result) |
| 274 | for now_glean_index in range(entity_extract_max_gleaning): |
| 275 | glean_result = await use_llm_func(continue_prompt, history_messages=history) |
| 276 | |
| 277 | history += pack_user_ass_to_openai_messages(continue_prompt, glean_result) |
| 278 | final_result += glean_result |
| 279 | if now_glean_index == entity_extract_max_gleaning - 1: |
| 280 | break |
| 281 | |
| 282 | if_loop_result: str = await use_llm_func( |
| 283 | if_loop_prompt, history_messages=history |
| 284 | ) |
| 285 | if_loop_result = if_loop_result.strip().strip('"').strip("'").lower() |
| 286 | if if_loop_result != "yes": |
| 287 | break |
| 288 | |
| 289 | records = split_string_by_multi_markers( |
| 290 | final_result, |
| 291 | [context_base["record_delimiter"], context_base["completion_delimiter"]], |
| 292 | ) |
| 293 | |
| 294 | maybe_nodes = defaultdict(list) |
| 295 | maybe_edges = defaultdict(list) |
| 296 | for record in records: |
| 297 | record = re.search(r"\((.*)\)", record) |
| 298 | if record is None: |
| 299 | continue |
| 300 | record = record.group(1) |
| 301 | record_attributes = split_string_by_multi_markers( |
| 302 | record, [context_base["tuple_delimiter"]] |
| 303 | ) |
| 304 | if_entities = await _handle_single_entity_extraction( |
| 305 | record_attributes, chunk_key |
| 306 | ) |
| 307 | if if_entities is not None: |
| 308 | maybe_nodes[if_entities["entity_name"]].append(if_entities) |
| 309 | continue |
| 310 | |
| 311 | if_relation = await _handle_single_relationship_extraction( |
| 312 | record_attributes, chunk_key |
| 313 | ) |
| 314 | if if_relation is not None: |
| 315 | maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append( |
| 316 | if_relation |
| 317 | ) |
| 318 | already_processed += 1 |
| 319 | already_entities += len(maybe_nodes) |
| 320 | already_relations += len(maybe_edges) |
| 321 | now_ticks = PROMPTS["process_tickers"][ |
| 322 | already_processed % len(PROMPTS["process_tickers"]) |
no test coverage detected