(toc_with_page_number, page_list, incorrect_results, start_index=1, model=None, logger=None)
| 758 | |
| 759 | |
| 760 | async def fix_incorrect_toc(toc_with_page_number, page_list, incorrect_results, start_index=1, model=None, logger=None): |
| 761 | print(f'start fix_incorrect_toc with {len(incorrect_results)} incorrect results') |
| 762 | incorrect_indices = {result['list_index'] for result in incorrect_results} |
| 763 | |
| 764 | end_index = len(page_list) + start_index - 1 |
| 765 | |
| 766 | incorrect_results_and_range_logs = [] |
| 767 | # Helper function to process and check a single incorrect item |
| 768 | async def process_and_check_item(incorrect_item): |
| 769 | list_index = incorrect_item['list_index'] |
| 770 | |
| 771 | # Check if list_index is valid |
| 772 | if list_index < 0 or list_index >= len(toc_with_page_number): |
| 773 | # Return an invalid result for out-of-bounds indices |
| 774 | return { |
| 775 | 'list_index': list_index, |
| 776 | 'title': incorrect_item['title'], |
| 777 | 'physical_index': incorrect_item.get('physical_index'), |
| 778 | 'is_valid': False |
| 779 | } |
| 780 | |
| 781 | # Find the previous correct item |
| 782 | prev_correct = None |
| 783 | for i in range(list_index-1, -1, -1): |
| 784 | if i not in incorrect_indices and i >= 0 and i < len(toc_with_page_number): |
| 785 | physical_index = toc_with_page_number[i].get('physical_index') |
| 786 | if physical_index is not None: |
| 787 | prev_correct = physical_index |
| 788 | break |
| 789 | # If no previous correct item found, use start_index |
| 790 | if prev_correct is None: |
| 791 | prev_correct = start_index - 1 |
| 792 | |
| 793 | # Find the next correct item |
| 794 | next_correct = None |
| 795 | for i in range(list_index+1, len(toc_with_page_number)): |
| 796 | if i not in incorrect_indices and i >= 0 and i < len(toc_with_page_number): |
| 797 | physical_index = toc_with_page_number[i].get('physical_index') |
| 798 | if physical_index is not None: |
| 799 | next_correct = physical_index |
| 800 | break |
| 801 | # If no next correct item found, use end_index |
| 802 | if next_correct is None: |
| 803 | next_correct = end_index |
| 804 | |
| 805 | incorrect_results_and_range_logs.append({ |
| 806 | 'list_index': list_index, |
| 807 | 'title': incorrect_item['title'], |
| 808 | 'prev_correct': prev_correct, |
| 809 | 'next_correct': next_correct |
| 810 | }) |
| 811 | |
| 812 | page_contents=[] |
| 813 | for page_index in range(prev_correct, next_correct+1): |
| 814 | # Add bounds checking to prevent IndexError |
| 815 | page_list_idx = page_index - start_index |
| 816 | if page_list_idx >= 0 and page_list_idx < len(page_list): |
| 817 | page_text = f"<physical_index_{page_index}>\n{page_list[page_list_idx][0]}\n<physical_index_{page_index}>\n\n" |
no test coverage detected