MCPcopy
hub / github.com/VectifyAI/PageIndex / meta_processor

Function meta_processor

pageindex/page_index.py:959–997  ·  view source on GitHub ↗
(page_list, mode=None, toc_content=None, toc_page_list=None, start_index=1, opt=None, logger=None)

Source from the content-addressed store, hash-verified

957
958################### main process #########################################################
959async def meta_processor(page_list, mode=None, toc_content=None, toc_page_list=None, start_index=1, opt=None, logger=None):
960 print(mode)
961 print(f'start_index: {start_index}')
962
963 if mode == 'process_toc_with_page_numbers':
964 toc_with_page_number = process_toc_with_page_numbers(toc_content, toc_page_list, page_list, toc_check_page_num=opt.toc_check_page_num, model=opt.model, logger=logger)
965 elif mode == 'process_toc_no_page_numbers':
966 toc_with_page_number = process_toc_no_page_numbers(toc_content, toc_page_list, page_list, model=opt.model, logger=logger)
967 else:
968 toc_with_page_number = process_no_toc(page_list, start_index=start_index, model=opt.model, logger=logger)
969
970 toc_with_page_number = [item for item in toc_with_page_number if item.get('physical_index') is not None]
971
972 toc_with_page_number = validate_and_truncate_physical_indices(
973 toc_with_page_number,
974 len(page_list),
975 start_index=start_index,
976 logger=logger
977 )
978
979 accuracy, incorrect_results = await verify_toc(page_list, toc_with_page_number, start_index=start_index, model=opt.model)
980
981 logger.info({
982 'mode': 'process_toc_with_page_numbers',
983 'accuracy': accuracy,
984 'incorrect_results': incorrect_results
985 })
986 if accuracy == 1.0 and len(incorrect_results) == 0:
987 return toc_with_page_number
988 if accuracy > 0.6 and len(incorrect_results) > 0:
989 toc_with_page_number, incorrect_results = await fix_incorrect_toc_with_retries(toc_with_page_number, page_list, incorrect_results,start_index=start_index, max_attempts=3, model=opt.model, logger=logger)
990 return toc_with_page_number
991 else:
992 if mode == 'process_toc_with_page_numbers':
993 return await meta_processor(page_list, mode='process_toc_no_page_numbers', toc_content=toc_content, toc_page_list=toc_page_list, start_index=start_index, opt=opt, logger=logger)
994 elif mode == 'process_toc_no_page_numbers':
995 return await meta_processor(page_list, mode='process_no_toc', start_index=start_index, opt=opt, logger=logger)
996 else:
997 raise Exception('Processing failed')
998
999
1000async def process_large_node_recursively(node, page_list, opt=None, logger=None):

Callers 2

tree_parserFunction · 0.85

Calls 7

process_no_tocFunction · 0.85
verify_tocFunction · 0.85
infoMethod · 0.80

Tested by

no test coverage detected