hub / github.com/idank/explainshell / LLMExtractor

Class LLMExtractor

explainshell/extraction/llm/extractor.py:188–568 · view source on GitHub ↗

LLM-based option extractor. Implements the base ``Extractor`` protocol via ``extract()``. Also satisfies ``BatchExtractor`` via ``prepare()``, ``finalize()``, and ``batch_provider``.

Source from the content-addressed store, hash-verified

186
187
188	class LLMExtractor:
189	"""LLM-based option extractor.
190
191	Implements the base ``Extractor`` protocol via ``extract()``.
192	Also satisfies ``BatchExtractor`` via ``prepare()``, ``finalize()``,
193	and ``batch_provider``.
194	"""
195
196	def __init__(self, config: ExtractorConfig) -> None:
197	self._model = config.model or ""
198	self._run_dir = config.run_dir
199	self._repo_root = config.repo_root
200	self._debug = config.debug
201	self.provider: LLMProvider = make_provider(self._model)
202	try:
203	self.batch_provider: BatchProvider = make_batch_provider(self._model)
204	except ValueError:
205	pass # model doesn't support batch; accessed only via --batch flag
206	self._cancelled = threading.Event()
207
208	def cancel(self) -> None:
209	"""Signal all in-progress extract() calls to stop after their current
210	LLM request completes. Does not abort already in-flight HTTP calls,
211	but prevents the next chunk from being submitted."""
212	self._cancelled.set()
213
214	def extract(self, gz_path: str) -> ExtractionResult:
215	"""Full extraction pipeline: prepare → LLM calls → finalize."""
216	prepared = self.prepare(gz_path)
217	basename = prepared.basename
218	n_chunks = prepared.n_chunks
219
220	logger.info(
221	"%s: %d chars (%d numbered), %d chunk(s)",
222	basename,
223	prepared.plain_text_len,
224	len(prepared.numbered_text),
225	n_chunks,
226	)
227
228	stats = ExtractionStats(
229	chunks=n_chunks,
230	plain_text_len=prepared.plain_text_len,
231	)
232
233	all_chunk_data: list[ChunkResult] = []
234	t0 = time.monotonic()
235
236	for i, user_content in enumerate(prepared.requests):
237	if self._cancelled.is_set():
238	raise ExtractionError("cancelled", reason_class=FailureReason.CANCELLED)
239
240	chunk_label = (
241	f"chunk {i + 1}/{n_chunks}" if n_chunks > 1 else "single chunk"
242	)
243	logger.info(
244	"%s: calling LLM (%s, %d chars)...",
245	basename,

Callers 4

make_extractorFunction · 0.90

test_batch_mode_raises_for_non_batch_llm_modelMethod · 0.90

_make_mock_extractorFunction · 0.90

test_real_llm_echo_manpageFunction · 0.90

Calls

no outgoing calls

Tested by 3

test_batch_mode_raises_for_non_batch_llm_modelMethod · 0.72

_make_mock_extractorFunction · 0.72

test_real_llm_echo_manpageFunction · 0.72