A gRPC servicer that implements the Backend service defined in backend.proto.
| 34 | |
| 35 | # Implement the BackendServicer class with the service methods |
| 36 | class BackendServicer(backend_pb2_grpc.BackendServicer): |
| 37 | """ |
| 38 | A gRPC servicer that implements the Backend service defined in backend.proto. |
| 39 | """ |
| 40 | |
| 41 | def Health(self, request, context): |
| 42 | """ |
| 43 | Returns a health check message. |
| 44 | |
| 45 | Args: |
| 46 | request: The health check request. |
| 47 | context: The gRPC context. |
| 48 | |
| 49 | Returns: |
| 50 | backend_pb2.Reply: The health check reply. |
| 51 | """ |
| 52 | return backend_pb2.Reply(message=bytes("OK", 'utf-8')) |
| 53 | |
| 54 | async def LoadModel(self, request, context): |
| 55 | """ |
| 56 | Loads a language model using MLX. |
| 57 | |
| 58 | Args: |
| 59 | request: The load model request. |
| 60 | context: The gRPC context. |
| 61 | |
| 62 | Returns: |
| 63 | backend_pb2.Result: The load model result. |
| 64 | """ |
| 65 | try: |
| 66 | print(f"Loading MLX model: {request.Model}", file=sys.stderr) |
| 67 | print(f"Request: {request}", file=sys.stderr) |
| 68 | |
| 69 | # Parse Options[] key:value strings into a typed dict (shared helper) |
| 70 | self.options = parse_options(request.Options) |
| 71 | print(f"Options: {self.options}", file=sys.stderr) |
| 72 | |
| 73 | # Build tokenizer config for MLX using options |
| 74 | tokenizer_config = {} |
| 75 | |
| 76 | # Handle trust_remote_code from request or options |
| 77 | if request.TrustRemoteCode or self.options.get("trust_remote_code", False): |
| 78 | tokenizer_config["trust_remote_code"] = True |
| 79 | |
| 80 | # Handle EOS token from options |
| 81 | if "eos_token" in self.options: |
| 82 | tokenizer_config["eos_token"] = self.options["eos_token"] |
| 83 | |
| 84 | # Handle other tokenizer config options |
| 85 | for key in ["pad_token", "bos_token", "unk_token", "sep_token", "cls_token", "mask_token"]: |
| 86 | if key in self.options: |
| 87 | tokenizer_config[key] = self.options[key] |
| 88 | |
| 89 | # Load model and tokenizer using MLX |
| 90 | if tokenizer_config: |
| 91 | print(f"Loading with tokenizer_config: {tokenizer_config}", file=sys.stderr) |
| 92 | self.model, self.tokenizer = load(request.Model, tokenizer_config=tokenizer_config) |
| 93 | else: |