MCPcopy Index your code
hub / github.com/idank/explainshell / LLMExtractor

Class LLMExtractor

explainshell/extraction/llm/extractor.py:188–568  ·  view source on GitHub ↗

LLM-based option extractor. Implements the base ``Extractor`` protocol via ``extract()``. Also satisfies ``BatchExtractor`` via ``prepare()``, ``finalize()``, and ``batch_provider``.

Source from the content-addressed store, hash-verified

186
187
188class LLMExtractor:
189 """LLM-based option extractor.
190
191 Implements the base ``Extractor`` protocol via ``extract()``.
192 Also satisfies ``BatchExtractor`` via ``prepare()``, ``finalize()``,
193 and ``batch_provider``.
194 """
195
196 def __init__(self, config: ExtractorConfig) -> None:
197 self._model = config.model or ""
198 self._run_dir = config.run_dir
199 self._repo_root = config.repo_root
200 self._debug = config.debug
201 self.provider: LLMProvider = make_provider(self._model)
202 try:
203 self.batch_provider: BatchProvider = make_batch_provider(self._model)
204 except ValueError:
205 pass # model doesn't support batch; accessed only via --batch flag
206 self._cancelled = threading.Event()
207
208 def cancel(self) -> None:
209 """Signal all in-progress extract() calls to stop after their current
210 LLM request completes. Does not abort already in-flight HTTP calls,
211 but prevents the next chunk from being submitted."""
212 self._cancelled.set()
213
214 def extract(self, gz_path: str) -> ExtractionResult:
215 """Full extraction pipeline: prepare → LLM calls → finalize."""
216 prepared = self.prepare(gz_path)
217 basename = prepared.basename
218 n_chunks = prepared.n_chunks
219
220 logger.info(
221 "%s: %d chars (%d numbered), %d chunk(s)",
222 basename,
223 prepared.plain_text_len,
224 len(prepared.numbered_text),
225 n_chunks,
226 )
227
228 stats = ExtractionStats(
229 chunks=n_chunks,
230 plain_text_len=prepared.plain_text_len,
231 )
232
233 all_chunk_data: list[ChunkResult] = []
234 t0 = time.monotonic()
235
236 for i, user_content in enumerate(prepared.requests):
237 if self._cancelled.is_set():
238 raise ExtractionError("cancelled", reason_class=FailureReason.CANCELLED)
239
240 chunk_label = (
241 f"chunk {i + 1}/{n_chunks}" if n_chunks > 1 else "single chunk"
242 )
243 logger.info(
244 "%s: calling LLM (%s, %d chars)...",
245 basename,

Callers 4

make_extractorFunction · 0.90
_make_mock_extractorFunction · 0.90

Calls

no outgoing calls