| 60 | |
| 61 | |
| 62 | def extract_node_text_content(node_list, markdown_lines): |
| 63 | all_nodes = [] |
| 64 | for node in node_list: |
| 65 | line_content = markdown_lines[node['line_num'] - 1] |
| 66 | header_match = re.match(r'^(#{1,6})', line_content) |
| 67 | |
| 68 | if header_match is None: |
| 69 | print(f"Warning: Line {node['line_num']} does not contain a valid header: '{line_content}'") |
| 70 | continue |
| 71 | |
| 72 | processed_node = { |
| 73 | 'title': node['node_title'], |
| 74 | 'line_num': node['line_num'], |
| 75 | 'level': len(header_match.group(1)) |
| 76 | } |
| 77 | all_nodes.append(processed_node) |
| 78 | |
| 79 | for i, node in enumerate(all_nodes): |
| 80 | start_line = node['line_num'] - 1 |
| 81 | if i + 1 < len(all_nodes): |
| 82 | end_line = all_nodes[i + 1]['line_num'] - 1 |
| 83 | else: |
| 84 | end_line = len(markdown_lines) |
| 85 | |
| 86 | node['text'] = '\n'.join(markdown_lines[start_line:end_line]).strip() |
| 87 | return all_nodes |
| 88 | |
| 89 | def update_node_list_with_text_token_count(node_list, model=None): |
| 90 | |