(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
**kwargs: Any,
)
| 497 | ) |
| 498 | |
| 499 | async def _astream( |
| 500 | self, |
| 501 | messages: List[BaseMessage], |
| 502 | stop: Optional[List[str]] = None, |
| 503 | run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, |
| 504 | **kwargs: Any, |
| 505 | ) -> AsyncIterator[ChatGenerationChunk]: |
| 506 | configure_litellm() |
| 507 | msgs = self._convert_messages(messages) |
| 508 | |
| 509 | # Apply rate limiting if configured |
| 510 | await apply_rate_limiter(self.a0_model_conf, str(msgs)) |
| 511 | |
| 512 | result = ChatGenerationResult() |
| 513 | call_kwargs = _merge_litellm_call_kwargs(self.kwargs, kwargs) |
| 514 | transport = LiteLLMTransport( |
| 515 | model=self.model_name, |
| 516 | messages=msgs, |
| 517 | kwargs=call_kwargs, |
| 518 | stop=stop, |
| 519 | ) |
| 520 | async for parsed in transport.astream(): |
| 521 | output = result.add_chunk(parsed) |
| 522 | if output["response_delta"]: |
| 523 | yield ChatGenerationChunk( |
| 524 | message=AIMessageChunk(content=output["response_delta"]) |
| 525 | ) |
| 526 | |
| 527 | async def unified_call( |
| 528 | self, |
nothing calls this directly
no test coverage detected