(page_list, mode=None, toc_content=None, toc_page_list=None, start_index=1, opt=None, logger=None)
| 957 | |
| 958 | ################### main process ######################################################### |
| 959 | async def meta_processor(page_list, mode=None, toc_content=None, toc_page_list=None, start_index=1, opt=None, logger=None): |
| 960 | print(mode) |
| 961 | print(f'start_index: {start_index}') |
| 962 | |
| 963 | if mode == 'process_toc_with_page_numbers': |
| 964 | toc_with_page_number = process_toc_with_page_numbers(toc_content, toc_page_list, page_list, toc_check_page_num=opt.toc_check_page_num, model=opt.model, logger=logger) |
| 965 | elif mode == 'process_toc_no_page_numbers': |
| 966 | toc_with_page_number = process_toc_no_page_numbers(toc_content, toc_page_list, page_list, model=opt.model, logger=logger) |
| 967 | else: |
| 968 | toc_with_page_number = process_no_toc(page_list, start_index=start_index, model=opt.model, logger=logger) |
| 969 | |
| 970 | toc_with_page_number = [item for item in toc_with_page_number if item.get('physical_index') is not None] |
| 971 | |
| 972 | toc_with_page_number = validate_and_truncate_physical_indices( |
| 973 | toc_with_page_number, |
| 974 | len(page_list), |
| 975 | start_index=start_index, |
| 976 | logger=logger |
| 977 | ) |
| 978 | |
| 979 | accuracy, incorrect_results = await verify_toc(page_list, toc_with_page_number, start_index=start_index, model=opt.model) |
| 980 | |
| 981 | logger.info({ |
| 982 | 'mode': 'process_toc_with_page_numbers', |
| 983 | 'accuracy': accuracy, |
| 984 | 'incorrect_results': incorrect_results |
| 985 | }) |
| 986 | if accuracy == 1.0 and len(incorrect_results) == 0: |
| 987 | return toc_with_page_number |
| 988 | if accuracy > 0.6 and len(incorrect_results) > 0: |
| 989 | toc_with_page_number, incorrect_results = await fix_incorrect_toc_with_retries(toc_with_page_number, page_list, incorrect_results,start_index=start_index, max_attempts=3, model=opt.model, logger=logger) |
| 990 | return toc_with_page_number |
| 991 | else: |
| 992 | if mode == 'process_toc_with_page_numbers': |
| 993 | return await meta_processor(page_list, mode='process_toc_no_page_numbers', toc_content=toc_content, toc_page_list=toc_page_list, start_index=start_index, opt=opt, logger=logger) |
| 994 | elif mode == 'process_toc_no_page_numbers': |
| 995 | return await meta_processor(page_list, mode='process_no_toc', start_index=start_index, opt=opt, logger=logger) |
| 996 | else: |
| 997 | raise Exception('Processing failed') |
| 998 | |
| 999 | |
| 1000 | async def process_large_node_recursively(node, page_list, opt=None, logger=None): |
no test coverage detected