MCPcopy
hub / github.com/idank/explainshell / extract

Method extract

explainshell/extraction/llm/extractor.py:214–270  ·  view source on GitHub ↗

Full extraction pipeline: prepare → LLM calls → finalize.

(self, gz_path: str)

Source from the content-addressed store, hash-verified

212 self._cancelled.set()
213
214 def extract(self, gz_path: str) -> ExtractionResult:
215 """Full extraction pipeline: prepare → LLM calls → finalize."""
216 prepared = self.prepare(gz_path)
217 basename = prepared.basename
218 n_chunks = prepared.n_chunks
219
220 logger.info(
221 "%s: %d chars (%d numbered), %d chunk(s)",
222 basename,
223 prepared.plain_text_len,
224 len(prepared.numbered_text),
225 n_chunks,
226 )
227
228 stats = ExtractionStats(
229 chunks=n_chunks,
230 plain_text_len=prepared.plain_text_len,
231 )
232
233 all_chunk_data: list[ChunkResult] = []
234 t0 = time.monotonic()
235
236 for i, user_content in enumerate(prepared.requests):
237 if self._cancelled.is_set():
238 raise ExtractionError("cancelled", reason_class=FailureReason.CANCELLED)
239
240 chunk_label = (
241 f"chunk {i + 1}/{n_chunks}" if n_chunks > 1 else "single chunk"
242 )
243 logger.info(
244 "%s: calling LLM (%s, %d chars)...",
245 basename,
246 chunk_label,
247 len(user_content),
248 )
249
250 try:
251 cr = self._call_llm(user_content)
252 except ExtractionError as e:
253 if e.raw_response:
254 self._dump_failed_response(gz_path, i, e.raw_response)
255 raise
256
257 stats.input_tokens += cr.usage.input_tokens
258 stats.output_tokens += cr.usage.output_tokens
259 stats.reasoning_tokens += cr.usage.reasoning_tokens
260 n_opts = len(cr.data["options"])
261 logger.info(
262 "%s: LLM returned %d option(s) for %s",
263 basename,
264 n_opts,
265 chunk_label,
266 )
267 all_chunk_data.append(cr)
268
269 stats.elapsed_seconds = time.monotonic() - t0
270 return self._finalize(gz_path, prepared, all_chunk_data, stats)
271

Callers 1

Calls 6

prepareMethod · 0.95
_call_llmMethod · 0.95
_dump_failed_responseMethod · 0.95
_finalizeMethod · 0.95
ExtractionStatsClass · 0.90
ExtractionErrorClass · 0.90

Tested by 1