A gRPC servicer that implements the Backend service defined in backend.proto.
| 63 | |
| 64 | # Implement the BackendServicer class with the service methods |
| 65 | class BackendServicer(backend_pb2_grpc.BackendServicer): |
| 66 | """ |
| 67 | A gRPC servicer that implements the Backend service defined in backend.proto. |
| 68 | """ |
| 69 | def generate(self,prompt, max_new_tokens): |
| 70 | """ |
| 71 | Generates text based on the given prompt and maximum number of new tokens. |
| 72 | |
| 73 | Args: |
| 74 | prompt (str): The prompt to generate text from. |
| 75 | max_new_tokens (int): The maximum number of new tokens to generate. |
| 76 | |
| 77 | Returns: |
| 78 | str: The generated text. |
| 79 | """ |
| 80 | self.generator.end_beam_search() |
| 81 | |
| 82 | # Tokenizing the input |
| 83 | ids = self.generator.tokenizer.encode(prompt) |
| 84 | |
| 85 | self.generator.gen_begin_reuse(ids) |
| 86 | initial_len = self.generator.sequence[0].shape[0] |
| 87 | has_leading_space = False |
| 88 | decoded_text = '' |
| 89 | for i in range(max_new_tokens): |
| 90 | token = self.generator.gen_single_token() |
| 91 | if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'): |
| 92 | has_leading_space = True |
| 93 | |
| 94 | decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:]) |
| 95 | if has_leading_space: |
| 96 | decoded_text = ' ' + decoded_text |
| 97 | |
| 98 | if token.item() == self.generator.tokenizer.eos_token_id: |
| 99 | break |
| 100 | return decoded_text |
| 101 | |
| 102 | def _parse_options(self, options_list): |
| 103 | """Parse Options[] key:value string list into a dict.""" |
| 104 | opts = {} |
| 105 | for opt in options_list: |
| 106 | if ":" not in opt: |
| 107 | continue |
| 108 | key, value = opt.split(":", 1) |
| 109 | opts[key.strip()] = value.strip() |
| 110 | return opts |
| 111 | |
| 112 | def _apply_engine_args(self, engine_args, engine_args_json): |
| 113 | """Apply user-supplied engine_args (JSON object) onto an AsyncEngineArgs. |
| 114 | |
| 115 | Returns a new AsyncEngineArgs with the typed fields preserved and the |
| 116 | user's overrides layered on top. Uses ``dataclasses.replace`` so vLLM's |
| 117 | ``__post_init__`` re-runs and auto-converts dict-valued fields like |
| 118 | ``compilation_config`` / ``attention_config`` into their dataclass form. |
| 119 | ``speculative_config`` and ``kv_transfer_config`` are accepted as dicts |
| 120 | directly (vLLM converts them at engine init). |
| 121 | |
| 122 | Unknown keys raise ValueError with the closest valid field as a hint. |
no outgoing calls