(text: str, lang: str, chunk_lines: int, overlap: int, min_chars: int)
| 137 | |
| 138 | |
| 139 | def chunk_file(text: str, lang: str, chunk_lines: int, overlap: int, min_chars: int) -> Iterator[Dict[str, object]]: |
| 140 | lines = text.splitlines() |
| 141 | symbols = find_symbols(lines, lang) |
| 142 | emitted = 0 |
| 143 | used_ranges: List[Tuple[int, int]] = [] |
| 144 | |
| 145 | for pos, (start, name) in enumerate(symbols): |
| 146 | next_start = symbols[pos + 1][0] if pos + 1 < len(symbols) else len(lines) |
| 147 | end = min(next_start, start + chunk_lines) |
| 148 | chunk = trim_chunk(lines[start:end], chunk_lines) |
| 149 | code = "\n".join(chunk).strip() |
| 150 | if len(code) < min_chars: |
| 151 | continue |
| 152 | used_ranges.append((start, end)) |
| 153 | emitted += 1 |
| 154 | yield { |
| 155 | "kind": "symbol", |
| 156 | "symbol": name, |
| 157 | "startLine": start + 1, |
| 158 | "endLine": start + len(chunk), |
| 159 | "code": code, |
| 160 | } |
| 161 | |
| 162 | # Fallback/window chunks keep files without obvious symbols useful, while |
| 163 | # still indexing snippets rather than complete files. |
| 164 | if emitted == 0: |
| 165 | for start, chunk in sliding_chunks(lines, chunk_lines, overlap): |
| 166 | code = "\n".join(chunk).strip() |
| 167 | if len(code) >= min_chars: |
| 168 | yield {"kind": "window", "symbol": None, "startLine": start + 1, "endLine": start + len(chunk), "code": code} |
| 169 | |
| 170 | |
| 171 | def snippet_id(repo: str, rel: str, start: int, code: str) -> str: |
no test coverage detected