hub / github.com/idank/explainshell / extract

Method extract

explainshell/extraction/llm/extractor.py:214–270 · view source on GitHub ↗

Full extraction pipeline: prepare → LLM calls → finalize.

(self, gz_path: str)

Source from the content-addressed store, hash-verified

212	self._cancelled.set()
213
214	def extract(self, gz_path: str) -> ExtractionResult:
215	"""Full extraction pipeline: prepare → LLM calls → finalize."""
216	prepared = self.prepare(gz_path)
217	basename = prepared.basename
218	n_chunks = prepared.n_chunks
219
220	logger.info(
221	"%s: %d chars (%d numbered), %d chunk(s)",
222	basename,
223	prepared.plain_text_len,
224	len(prepared.numbered_text),
225	n_chunks,
226	)
227
228	stats = ExtractionStats(
229	chunks=n_chunks,
230	plain_text_len=prepared.plain_text_len,
231	)
232
233	all_chunk_data: list[ChunkResult] = []
234	t0 = time.monotonic()
235
236	for i, user_content in enumerate(prepared.requests):
237	if self._cancelled.is_set():
238	raise ExtractionError("cancelled", reason_class=FailureReason.CANCELLED)
239
240	chunk_label = (
241	f"chunk {i + 1}/{n_chunks}" if n_chunks > 1 else "single chunk"
242	)
243	logger.info(
244	"%s: calling LLM (%s, %d chars)...",
245	basename,
246	chunk_label,
247	len(user_content),
248	)
249
250	try:
251	cr = self._call_llm(user_content)
252	except ExtractionError as e:
253	if e.raw_response:
254	self._dump_failed_response(gz_path, i, e.raw_response)
255	raise
256
257	stats.input_tokens += cr.usage.input_tokens
258	stats.output_tokens += cr.usage.output_tokens
259	stats.reasoning_tokens += cr.usage.reasoning_tokens
260	n_opts = len(cr.data["options"])
261	logger.info(
262	"%s: LLM returned %d option(s) for %s",
263	basename,
264	n_opts,
265	chunk_label,
266	)
267	all_chunk_data.append(cr)
268
269	stats.elapsed_seconds = time.monotonic() - t0
270	return self._finalize(gz_path, prepared, all_chunk_data, stats)
271

Callers 1

test_real_llm_echo_manpageFunction · 0.95

Calls 6

prepareMethod · 0.95

_call_llmMethod · 0.95

_dump_failed_responseMethod · 0.95

_finalizeMethod · 0.95

ExtractionStatsClass · 0.90

ExtractionErrorClass · 0.90

Tested by 1

test_real_llm_echo_manpageFunction · 0.76