MCPcopy
hub / github.com/mudler/LocalAI / BackendServicer

Class BackendServicer

backend/python/vllm/backend.py:65–893  ·  view source on GitHub ↗

A gRPC servicer that implements the Backend service defined in backend.proto.

Source from the content-addressed store, hash-verified

63
64# Implement the BackendServicer class with the service methods
65class BackendServicer(backend_pb2_grpc.BackendServicer):
66 """
67 A gRPC servicer that implements the Backend service defined in backend.proto.
68 """
69 def generate(self,prompt, max_new_tokens):
70 """
71 Generates text based on the given prompt and maximum number of new tokens.
72
73 Args:
74 prompt (str): The prompt to generate text from.
75 max_new_tokens (int): The maximum number of new tokens to generate.
76
77 Returns:
78 str: The generated text.
79 """
80 self.generator.end_beam_search()
81
82 # Tokenizing the input
83 ids = self.generator.tokenizer.encode(prompt)
84
85 self.generator.gen_begin_reuse(ids)
86 initial_len = self.generator.sequence[0].shape[0]
87 has_leading_space = False
88 decoded_text = ''
89 for i in range(max_new_tokens):
90 token = self.generator.gen_single_token()
91 if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
92 has_leading_space = True
93
94 decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
95 if has_leading_space:
96 decoded_text = ' ' + decoded_text
97
98 if token.item() == self.generator.tokenizer.eos_token_id:
99 break
100 return decoded_text
101
102 def _parse_options(self, options_list):
103 """Parse Options[] key:value string list into a dict."""
104 opts = {}
105 for opt in options_list:
106 if ":" not in opt:
107 continue
108 key, value = opt.split(":", 1)
109 opts[key.strip()] = value.strip()
110 return opts
111
112 def _apply_engine_args(self, engine_args, engine_args_json):
113 """Apply user-supplied engine_args (JSON object) onto an AsyncEngineArgs.
114
115 Returns a new AsyncEngineArgs with the typed fields preserved and the
116 user's overrides layered on top. Uses ``dataclasses.replace`` so vLLM's
117 ``__post_init__`` re-runs and auto-converts dict-valued fields like
118 ``compilation_config`` / ``attention_config`` into their dataclass form.
119 ``speculative_config`` and ``kv_transfer_config`` are accepted as dicts
120 directly (vLLM converts them at engine init).
121
122 Unknown keys raise ValueError with the closest valid field as a hint.

Calls

no outgoing calls