MCPcopy Index your code
hub / github.com/mudler/LocalAI / Embedding

Method Embedding

backend/python/tinygrad/backend.py:609–647  ·  view source on GitHub ↗
(self, request, context)

Source from the content-addressed store, hash-verified

607 context.set_details(f"PredictStream failed: {exc}")
608
609 async def Embedding(self, request, context):
610 if self.llm_model is None or self.llm_tokenizer is None:
611 context.set_code(grpc.StatusCode.FAILED_PRECONDITION)
612 context.set_details("No model loaded")
613 return backend_pb2.EmbeddingResult()
614
615 try:
616 text = request.Embeddings
617 if not text:
618 context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
619 context.set_details("Embeddings field is empty")
620 return backend_pb2.EmbeddingResult()
621
622 from tinygrad import Tensor, dtypes
623 from vendor.appsllm_adapter import _embed_hidden
624
625 ids = self._encode_prompt(text)
626 if not ids:
627 return backend_pb2.EmbeddingResult(embeddings=[])
628
629 # Clamp to context window — truncate long inputs rather than blow up.
630 ids = ids[: self.max_context]
631 tokens = Tensor([ids])
632
633 hidden = _embed_hidden(self.llm_model, tokens) # (1, seqlen, dim)
634 # Mean pool over sequence dim
635 pooled = hidden.mean(axis=1).squeeze(0) # (dim,)
636 # L2 normalize
637 norm = pooled.square().sum().sqrt()
638 normalized = (pooled / (norm + 1e-12))
639 vec = normalized.cast(dtypes.float32).tolist()
640
641 return backend_pb2.EmbeddingResult(embeddings=[float(x) for x in vec])
642 except Exception as exc:
643 import traceback
644 traceback.print_exc()
645 context.set_code(grpc.StatusCode.INTERNAL)
646 context.set_details(f"Embedding failed: {exc}")
647 return backend_pb2.EmbeddingResult()
648
649 async def GenerateImage(self, request, context):
650 if self.sd_model is None:

Callers

nothing calls this directly

Calls 3

_encode_promptMethod · 0.95
set_codeMethod · 0.45
set_detailsMethod · 0.45

Tested by

no test coverage detected