Reject an LLM extraction whose line spans look pathological. Raises ``ExtractionError`` when the sum of all option spans exceeds ``_MAX_COVERAGE_RATIO`` times the document extent (derived from the highest end-line in the options' own metadata). This catches the common failure mode
(options: list[Option])
| 205 | |
| 206 | |
| 207 | def sanity_check_line_spans(options: list[Option]) -> None: |
| 208 | """Reject an LLM extraction whose line spans look pathological. |
| 209 | |
| 210 | Raises ``ExtractionError`` when the sum of all option spans exceeds |
| 211 | ``_MAX_COVERAGE_RATIO`` times the document extent (derived from the |
| 212 | highest end-line in the options' own metadata). This catches the |
| 213 | common failure mode where the LLM returns near-document-sized ranges |
| 214 | (e.g. ``[1, 927]``) for every option instead of precise per-option |
| 215 | spans. |
| 216 | """ |
| 217 | if not options: |
| 218 | return |
| 219 | |
| 220 | total_span = 0 |
| 221 | max_end = 0 |
| 222 | for opt in options: |
| 223 | meta = opt.meta or {} |
| 224 | lines = meta.get("lines") |
| 225 | if lines and isinstance(lines, list) and len(lines) == 2: |
| 226 | total_span += max(lines[1] - lines[0] + 1, 0) |
| 227 | max_end = max(max_end, lines[1]) |
| 228 | |
| 229 | if max_end < 1: |
| 230 | return |
| 231 | |
| 232 | coverage = total_span / max_end |
| 233 | |
| 234 | if coverage > _MAX_COVERAGE_RATIO: |
| 235 | raise ExtractionError( |
| 236 | f"line-span coverage {coverage:.1f}x exceeds {_MAX_COVERAGE_RATIO}x limit " |
| 237 | f"({len(options)} options, {max_end} lines) " |
| 238 | f"(try a stronger model?)", |
| 239 | reason_class=FailureReason.LINE_SPAN_COVERAGE, |
| 240 | ) |
| 241 | |
| 242 | |
| 243 | def postprocess( |