(md_path, if_thinning=False, min_token_threshold=None, if_add_node_summary='no', summary_token_threshold=None, model=None, if_add_doc_description='no', if_add_node_text='no', if_add_node_id='yes')
| 241 | |
| 242 | |
| 243 | async def md_to_tree(md_path, if_thinning=False, min_token_threshold=None, if_add_node_summary='no', summary_token_threshold=None, model=None, if_add_doc_description='no', if_add_node_text='no', if_add_node_id='yes'): |
| 244 | with open(md_path, 'r', encoding='utf-8') as f: |
| 245 | markdown_content = f.read() |
| 246 | line_count = markdown_content.count('\n') + 1 |
| 247 | |
| 248 | print(f"Extracting nodes from markdown...") |
| 249 | node_list, markdown_lines = extract_nodes_from_markdown(markdown_content) |
| 250 | |
| 251 | print(f"Extracting text content from nodes...") |
| 252 | nodes_with_content = extract_node_text_content(node_list, markdown_lines) |
| 253 | |
| 254 | if if_thinning: |
| 255 | nodes_with_content = update_node_list_with_text_token_count(nodes_with_content, model=model) |
| 256 | print(f"Thinning nodes...") |
| 257 | nodes_with_content = tree_thinning_for_index(nodes_with_content, min_token_threshold, model=model) |
| 258 | |
| 259 | print(f"Building tree from nodes...") |
| 260 | tree_structure = build_tree_from_nodes(nodes_with_content) |
| 261 | |
| 262 | if if_add_node_id == 'yes': |
| 263 | write_node_id(tree_structure) |
| 264 | |
| 265 | print(f"Formatting tree structure...") |
| 266 | |
| 267 | if if_add_node_summary == 'yes': |
| 268 | # Always include text for summary generation |
| 269 | tree_structure = format_structure(tree_structure, order = ['title', 'node_id', 'line_num', 'summary', 'prefix_summary', 'text', 'nodes']) |
| 270 | |
| 271 | print(f"Generating summaries for each node...") |
| 272 | tree_structure = await generate_summaries_for_structure_md(tree_structure, summary_token_threshold=summary_token_threshold, model=model) |
| 273 | |
| 274 | if if_add_node_text == 'no': |
| 275 | # Remove text after summary generation if not requested |
| 276 | tree_structure = format_structure(tree_structure, order = ['title', 'node_id', 'line_num', 'summary', 'prefix_summary', 'nodes']) |
| 277 | |
| 278 | if if_add_doc_description == 'yes': |
| 279 | print(f"Generating document description...") |
| 280 | # Create a clean structure without unnecessary fields for description generation |
| 281 | clean_structure = create_clean_structure_for_description(tree_structure) |
| 282 | doc_description = generate_doc_description(clean_structure, model=model) |
| 283 | return { |
| 284 | 'doc_name': os.path.splitext(os.path.basename(md_path))[0], |
| 285 | 'doc_description': doc_description, |
| 286 | 'line_count': line_count, |
| 287 | 'structure': tree_structure, |
| 288 | } |
| 289 | else: |
| 290 | # No summaries needed, format based on text preference |
| 291 | if if_add_node_text == 'yes': |
| 292 | tree_structure = format_structure(tree_structure, order = ['title', 'node_id', 'line_num', 'summary', 'prefix_summary', 'text', 'nodes']) |
| 293 | else: |
| 294 | tree_structure = format_structure(tree_structure, order = ['title', 'node_id', 'line_num', 'summary', 'prefix_summary', 'nodes']) |
| 295 | |
| 296 | return { |
| 297 | 'doc_name': os.path.splitext(os.path.basename(md_path))[0], |
| 298 | 'line_count': line_count, |
| 299 | 'structure': tree_structure, |
| 300 | } |
no test coverage detected