Remove blacklisted top-level sections from manpage text. Returns (filtered_text, removal_counts).
(text: str)
| 112 | |
| 113 | |
| 114 | def filter_sections(text: str) -> tuple[str, dict[str, int]]: |
| 115 | """Remove blacklisted top-level sections from manpage text. |
| 116 | |
| 117 | Returns (filtered_text, removal_counts). |
| 118 | """ |
| 119 | sections = _split_sections(text) |
| 120 | kept: list[str] = [] |
| 121 | removal_counts: dict[str, int] = {} |
| 122 | skip_until_top = False |
| 123 | |
| 124 | for _start, section_text in sections: |
| 125 | header_line = section_text.split("\n", 1)[0].strip() |
| 126 | |
| 127 | is_top_level = header_line.startswith("# ") and not header_line.startswith( |
| 128 | "## " |
| 129 | ) |
| 130 | |
| 131 | if is_top_level: |
| 132 | heading_name = header_line.split(" ", 1)[1].strip().upper() |
| 133 | if heading_name in _BLACKLISTED_SECTIONS: |
| 134 | removal_counts[heading_name] = removal_counts.get(heading_name, 0) + 1 |
| 135 | skip_until_top = True |
| 136 | continue |
| 137 | else: |
| 138 | skip_until_top = False |
| 139 | |
| 140 | if skip_until_top: |
| 141 | continue |
| 142 | |
| 143 | kept.append(section_text) |
| 144 | |
| 145 | return "\n".join(kept), removal_counts |
| 146 | |
| 147 | |
| 148 | _MAX_PREAMBLE_CHARS = CHUNK_SIZE_CHARS // 2 |