Function extract_nodes_from_markdown

pageindex/page_index_md.py:32–59 · view source on GitHub ↗

(markdown_content)

Source from the content-addressed store, hash-verified

30
31
32	def extract_nodes_from_markdown(markdown_content):
33	header_pattern = r'^(#{1,6})\s+(.+)$'
34	code_block_pattern = r'^```'
35	node_list = []
36
37	lines = markdown_content.split('\n')
38	in_code_block = False
39
40	for line_num, line in enumerate(lines, 1):
41	stripped_line = line.strip()
42
43	# Check for code block delimiters (triple backticks)
44	if re.match(code_block_pattern, stripped_line):
45	in_code_block = not in_code_block
46	continue
47
48	# Skip empty lines
49	if not stripped_line:
50	continue
51
52	# Only look for headers when not inside a code block
53	if not in_code_block:
54	match = re.match(header_pattern, stripped_line)
55	if match:
56	title = match.group(2).strip()
57	node_list.append({'node_title': title, 'line_num': line_num})
58
59	return node_list, lines
60
61
62	def extract_node_text_content(node_list, markdown_lines):

md_to_treeFunction · 0.85

no outgoing calls

no test coverage detected