MCPcopy
hub / github.com/VectifyAI/PageIndex / tree_parser

Function tree_parser

pageindex/page_index.py:1029–1063  ·  view source on GitHub ↗
(page_list, opt, doc=None, logger=None)

Source from the content-addressed store, hash-verified

1027 return node
1028
1029async def tree_parser(page_list, opt, doc=None, logger=None):
1030 check_toc_result = check_toc(page_list, opt)
1031 logger.info(check_toc_result)
1032
1033 if check_toc_result.get("toc_content") and check_toc_result["toc_content"].strip() and check_toc_result["page_index_given_in_toc"] == "yes":
1034 toc_with_page_number = await meta_processor(
1035 page_list,
1036 mode='process_toc_with_page_numbers',
1037 start_index=1,
1038 toc_content=check_toc_result['toc_content'],
1039 toc_page_list=check_toc_result['toc_page_list'],
1040 opt=opt,
1041 logger=logger)
1042 else:
1043 toc_with_page_number = await meta_processor(
1044 page_list,
1045 mode='process_no_toc',
1046 start_index=1,
1047 opt=opt,
1048 logger=logger)
1049
1050 toc_with_page_number = add_preface_if_needed(toc_with_page_number)
1051 toc_with_page_number = await check_title_appearance_in_start_concurrent(toc_with_page_number, page_list, model=opt.model, logger=logger)
1052
1053 # Filter out items with None physical_index before post_processings
1054 valid_toc_items = [item for item in toc_with_page_number if item.get('physical_index') is not None]
1055
1056 toc_tree = post_processing(valid_toc_items, len(page_list))
1057 tasks = [
1058 process_large_node_recursively(node, page_list, opt, logger=logger)
1059 for node in toc_tree
1060 ]
1061 await asyncio.gather(*tasks)
1062
1063 return toc_tree
1064
1065
1066def page_index_main(doc, opt=None):

Callers 1

page_index_builderFunction · 0.85

Calls 7

check_tocFunction · 0.85
meta_processorFunction · 0.85
add_preface_if_neededFunction · 0.85
post_processingFunction · 0.85
infoMethod · 0.80

Tested by

no test coverage detected