MCPcopy
hub / github.com/VectifyAI/PageIndex / extract_node_text_content

Function extract_node_text_content

pageindex/page_index_md.py:62–87  ·  view source on GitHub ↗
(node_list, markdown_lines)

Source from the content-addressed store, hash-verified

60
61
62def extract_node_text_content(node_list, markdown_lines):
63 all_nodes = []
64 for node in node_list:
65 line_content = markdown_lines[node['line_num'] - 1]
66 header_match = re.match(r'^(#{1,6})', line_content)
67
68 if header_match is None:
69 print(f"Warning: Line {node['line_num']} does not contain a valid header: '{line_content}'")
70 continue
71
72 processed_node = {
73 'title': node['node_title'],
74 'line_num': node['line_num'],
75 'level': len(header_match.group(1))
76 }
77 all_nodes.append(processed_node)
78
79 for i, node in enumerate(all_nodes):
80 start_line = node['line_num'] - 1
81 if i + 1 < len(all_nodes):
82 end_line = all_nodes[i + 1]['line_num'] - 1
83 else:
84 end_line = len(markdown_lines)
85
86 node['text'] = '\n'.join(markdown_lines[start_line:end_line]).strip()
87 return all_nodes
88
89def update_node_list_with_text_token_count(node_list, model=None):
90

Callers 1

md_to_treeFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected