(
self,
*,
kind: str,
call_fn: Callable[[], Any],
request_view: LLMRequestView,
model: str | None,
response_builder: Callable[[Any], LLMResponseView],
)
| 385 | ) |
| 386 | |
| 387 | async def _invoke( |
| 388 | self, |
| 389 | *, |
| 390 | kind: str, |
| 391 | call_fn: Callable[[], Any], |
| 392 | request_view: LLMRequestView, |
| 393 | model: str | None, |
| 394 | response_builder: Callable[[Any], LLMResponseView], |
| 395 | ) -> Any: |
| 396 | call_ctx = self._build_call_context(model) |
| 397 | snapshot = self._registry.snapshot() |
| 398 | await self._run_before(snapshot.before, call_ctx, request_view) |
| 399 | start_time = time.perf_counter() |
| 400 | try: |
| 401 | result = call_fn() |
| 402 | if inspect.isawaitable(result): |
| 403 | result = await result |
| 404 | except Exception as exc: |
| 405 | latency_ms = (time.perf_counter() - start_time) * 1000 |
| 406 | usage = LLMUsage(latency_ms=latency_ms, status="error") |
| 407 | await self._run_on_error(snapshot.on_error, call_ctx, request_view, exc, usage) |
| 408 | raise |
| 409 | else: |
| 410 | latency_ms = (time.perf_counter() - start_time) * 1000 |
| 411 | |
| 412 | # Handle tuple response: (pure_response, raw_response) |
| 413 | pure_result = result |
| 414 | raw_response = None |
| 415 | if isinstance(result, tuple) and len(result) == 2: |
| 416 | pure_result, raw_response = result |
| 417 | |
| 418 | response_view = response_builder(pure_result) |
| 419 | |
| 420 | # Extract token usage from raw response (best-effort) |
| 421 | extracted_usage = _extract_usage_from_raw_response(kind=kind, raw_response=raw_response) |
| 422 | usage = LLMUsage( |
| 423 | input_tokens=extracted_usage.get("input_tokens"), |
| 424 | output_tokens=extracted_usage.get("output_tokens"), |
| 425 | total_tokens=extracted_usage.get("total_tokens"), |
| 426 | cached_input_tokens=extracted_usage.get("cached_input_tokens"), |
| 427 | reasoning_tokens=extracted_usage.get("reasoning_tokens"), |
| 428 | latency_ms=latency_ms, |
| 429 | finish_reason=extracted_usage.get("finish_reason"), |
| 430 | status="success", |
| 431 | tokens_breakdown=extracted_usage.get("tokens_breakdown"), |
| 432 | ) |
| 433 | |
| 434 | await self._run_after(snapshot.after, call_ctx, request_view, response_view, usage) |
| 435 | return pure_result |
| 436 | |
| 437 | def _build_call_context(self, model: str | None) -> LLMCallContext: |
| 438 | request_id = uuid.uuid4().hex |
no test coverage detected