(page_list, toc_page_list, model)
| 220 | return json_content['page_index_given_in_toc'] |
| 221 | |
| 222 | def toc_extractor(page_list, toc_page_list, model): |
| 223 | def transform_dots_to_colon(text): |
| 224 | text = re.sub(r'\.{5,}', ': ', text) |
| 225 | # Handle dots separated by spaces |
| 226 | text = re.sub(r'(?:\. ){5,}\.?', ': ', text) |
| 227 | return text |
| 228 | |
| 229 | toc_content = "" |
| 230 | for page_index in toc_page_list: |
| 231 | toc_content += page_list[page_index][0] |
| 232 | toc_content = transform_dots_to_colon(toc_content) |
| 233 | has_page_index = detect_page_index(toc_content, model=model) |
| 234 | |
| 235 | return { |
| 236 | "toc_content": toc_content, |
| 237 | "page_index_given_in_toc": has_page_index |
| 238 | } |
| 239 | |
| 240 | |
| 241 |
no test coverage detected