(self, request, context)
| 607 | context.set_details(f"PredictStream failed: {exc}") |
| 608 | |
| 609 | async def Embedding(self, request, context): |
| 610 | if self.llm_model is None or self.llm_tokenizer is None: |
| 611 | context.set_code(grpc.StatusCode.FAILED_PRECONDITION) |
| 612 | context.set_details("No model loaded") |
| 613 | return backend_pb2.EmbeddingResult() |
| 614 | |
| 615 | try: |
| 616 | text = request.Embeddings |
| 617 | if not text: |
| 618 | context.set_code(grpc.StatusCode.INVALID_ARGUMENT) |
| 619 | context.set_details("Embeddings field is empty") |
| 620 | return backend_pb2.EmbeddingResult() |
| 621 | |
| 622 | from tinygrad import Tensor, dtypes |
| 623 | from vendor.appsllm_adapter import _embed_hidden |
| 624 | |
| 625 | ids = self._encode_prompt(text) |
| 626 | if not ids: |
| 627 | return backend_pb2.EmbeddingResult(embeddings=[]) |
| 628 | |
| 629 | # Clamp to context window — truncate long inputs rather than blow up. |
| 630 | ids = ids[: self.max_context] |
| 631 | tokens = Tensor([ids]) |
| 632 | |
| 633 | hidden = _embed_hidden(self.llm_model, tokens) # (1, seqlen, dim) |
| 634 | # Mean pool over sequence dim |
| 635 | pooled = hidden.mean(axis=1).squeeze(0) # (dim,) |
| 636 | # L2 normalize |
| 637 | norm = pooled.square().sum().sqrt() |
| 638 | normalized = (pooled / (norm + 1e-12)) |
| 639 | vec = normalized.cast(dtypes.float32).tolist() |
| 640 | |
| 641 | return backend_pb2.EmbeddingResult(embeddings=[float(x) for x in vec]) |
| 642 | except Exception as exc: |
| 643 | import traceback |
| 644 | traceback.print_exc() |
| 645 | context.set_code(grpc.StatusCode.INTERNAL) |
| 646 | context.set_details(f"Embedding failed: {exc}") |
| 647 | return backend_pb2.EmbeddingResult() |
| 648 | |
| 649 | async def GenerateImage(self, request, context): |
| 650 | if self.sd_model is None: |
nothing calls this directly
no test coverage detected