Extract clean text for LLM/Search usage.
(soup: BeautifulSoup)
| 87 | |
| 88 | |
| 89 | def extract_plain_text(soup: BeautifulSoup) -> str: |
| 90 | """Extract clean text for LLM/Search usage.""" |
| 91 | text = soup.get_text(separator=' ') |
| 92 | # Collapse whitespace |
| 93 | return ' '.join(text.split()) |
| 94 | |
| 95 | |
| 96 | def parse_toc_recursive(toc_list, depth=0) -> List[TOCEntry]: |