()
| 1080 | logger.info({'total_token': sum([page[1] for page in page_list])}) |
| 1081 | |
| 1082 | async def page_index_builder(): |
| 1083 | structure = await tree_parser(page_list, opt, doc=doc, logger=logger) |
| 1084 | if opt.if_add_node_id == 'yes': |
| 1085 | write_node_id(structure) |
| 1086 | if opt.if_add_node_text == 'yes': |
| 1087 | add_node_text(structure, page_list) |
| 1088 | if opt.if_add_node_summary == 'yes': |
| 1089 | if opt.if_add_node_text == 'no': |
| 1090 | add_node_text(structure, page_list) |
| 1091 | await generate_summaries_for_structure(structure, model=opt.model) |
| 1092 | if opt.if_add_node_text == 'no': |
| 1093 | remove_structure_text(structure) |
| 1094 | if opt.if_add_doc_description == 'yes': |
| 1095 | # Create a clean structure without unnecessary fields for description generation |
| 1096 | clean_structure = create_clean_structure_for_description(structure) |
| 1097 | doc_description = generate_doc_description(clean_structure, model=opt.model) |
| 1098 | structure = format_structure(structure, order=['title', 'node_id', 'start_index', 'end_index', 'summary', 'text', 'nodes']) |
| 1099 | return { |
| 1100 | 'doc_name': get_pdf_name(doc), |
| 1101 | 'doc_description': doc_description, |
| 1102 | 'structure': structure, |
| 1103 | } |
| 1104 | structure = format_structure(structure, order=['title', 'node_id', 'start_index', 'end_index', 'summary', 'text', 'nodes']) |
| 1105 | return { |
| 1106 | 'doc_name': get_pdf_name(doc), |
| 1107 | 'structure': structure, |
| 1108 | } |
| 1109 | |
| 1110 | return asyncio.run(page_index_builder()) |
| 1111 |
no test coverage detected