(lines: List[str], chunk_lines: int, overlap: int)
| 129 | |
| 130 | |
| 131 | def sliding_chunks(lines: List[str], chunk_lines: int, overlap: int) -> Iterator[Tuple[int, List[str]]]: |
| 132 | step = max(1, chunk_lines - overlap) |
| 133 | for start in range(0, len(lines), step): |
| 134 | chunk = lines[start:start + chunk_lines] |
| 135 | if len("\n".join(chunk).strip()) >= 80: |
| 136 | yield start, chunk |
| 137 | |
| 138 | |
| 139 | def chunk_file(text: str, lang: str, chunk_lines: int, overlap: int, min_chars: int) -> Iterator[Dict[str, object]]: |