| 30 | |
| 31 | |
| 32 | def extract_nodes_from_markdown(markdown_content): |
| 33 | header_pattern = r'^(#{1,6})\s+(.+)$' |
| 34 | code_block_pattern = r'^```' |
| 35 | node_list = [] |
| 36 | |
| 37 | lines = markdown_content.split('\n') |
| 38 | in_code_block = False |
| 39 | |
| 40 | for line_num, line in enumerate(lines, 1): |
| 41 | stripped_line = line.strip() |
| 42 | |
| 43 | # Check for code block delimiters (triple backticks) |
| 44 | if re.match(code_block_pattern, stripped_line): |
| 45 | in_code_block = not in_code_block |
| 46 | continue |
| 47 | |
| 48 | # Skip empty lines |
| 49 | if not stripped_line: |
| 50 | continue |
| 51 | |
| 52 | # Only look for headers when not inside a code block |
| 53 | if not in_code_block: |
| 54 | match = re.match(header_pattern, stripped_line) |
| 55 | if match: |
| 56 | title = match.group(2).strip() |
| 57 | node_list.append({'node_title': title, 'line_num': line_num}) |
| 58 | |
| 59 | return node_list, lines |
| 60 | |
| 61 | |
| 62 | def extract_node_text_content(node_list, markdown_lines): |