A gRPC servicer for the backend service. This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
| 52 | |
| 53 | # Implement the BackendServicer class with the service methods |
| 54 | class BackendServicer(backend_pb2_grpc.BackendServicer): |
| 55 | """ |
| 56 | A gRPC servicer for the backend service. |
| 57 | |
| 58 | This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding. |
| 59 | """ |
| 60 | def Health(self, request, context): |
| 61 | return backend_pb2.Reply(message=bytes("OK", 'utf-8')) |
| 62 | |
| 63 | def LoadModel(self, request, context): |
| 64 | model_name = request.Model |
| 65 | |
| 66 | # Check to see if the Model exists in the filesystem already. |
| 67 | if os.path.exists(request.ModelFile): |
| 68 | model_name = request.ModelFile |
| 69 | |
| 70 | compute = torch.float16 |
| 71 | if request.F16Memory == True: |
| 72 | compute=torch.bfloat16 |
| 73 | |
| 74 | self.CUDA = torch.cuda.is_available() |
| 75 | self.OV=False |
| 76 | self.GenericTTS=False |
| 77 | self.SentenceTransformer = False |
| 78 | self.processor = None |
| 79 | |
| 80 | device_map="cpu" |
| 81 | mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available() |
| 82 | if mps_available: |
| 83 | device_map = "mps" |
| 84 | quantization = None |
| 85 | autoTokenizer = True |
| 86 | |
| 87 | # Parse options from request.Options |
| 88 | self.options = {} |
| 89 | options = request.Options |
| 90 | |
| 91 | # The options are a list of strings in this form optname:optvalue |
| 92 | # We are storing all the options in a dict so we can use it later when generating |
| 93 | # Example options: ["max_new_tokens:3072", "guidance_scale:3.0", "temperature:1.8", "top_p:0.90", "top_k:45"] |
| 94 | for opt in options: |
| 95 | if ":" not in opt: |
| 96 | continue |
| 97 | key, value = opt.split(":", 1) |
| 98 | # if value is a number, convert it to the appropriate type |
| 99 | try: |
| 100 | if "." in value: |
| 101 | value = float(value) |
| 102 | else: |
| 103 | value = int(value) |
| 104 | except ValueError: |
| 105 | # Keep as string if conversion fails |
| 106 | pass |
| 107 | self.options[key] = value |
| 108 | |
| 109 | print(f"Parsed options: {self.options}", file=sys.stderr) |
| 110 | |
| 111 | if self.CUDA: |