MCPcopy
hub / github.com/lm-sys/FastChat / generate_stream

Method generate_stream

fastchat/serve/sglang_worker.py:81–162  ·  view source on GitHub ↗
(self, params)

Source from the content-addressed store, hash-verified

79 self.init_heart_beat()
80
81 async def generate_stream(self, params):
82 self.call_ct += 1
83
84 prompt = params.pop("prompt")
85 images = params.get("images", [])
86 temperature = float(params.get("temperature", 1.0))
87 top_p = float(params.get("top_p", 1.0))
88 top_k = params.get("top_k", -1.0)
89 frequency_penalty = float(params.get("frequency_penalty", 0.0))
90 presence_penalty = float(params.get("presence_penalty", 0.0))
91 max_new_tokens = params.get("max_new_tokens", 256)
92 stop_str = params.get("stop", None)
93 stop_token_ids = params.get("stop_token_ids", None) or []
94 echo = params.get("echo", True)
95
96 # Handle stop_str
97 stop = []
98 if isinstance(stop_str, str) and stop_str != "":
99 stop.append(stop_str)
100 elif isinstance(stop_str, list) and stop_str != []:
101 stop.extend(stop_str)
102
103 for tid in stop_token_ids:
104 if tid is not None:
105 s = self.tokenizer.decode(tid)
106 if s != "":
107 stop.append(s)
108
109 # make sampling params for sgl.gen
110 top_p = max(top_p, 1e-5)
111 if temperature <= 1e-5:
112 top_p = 1.0
113
114 # split prompt by image token
115 split_prompt = prompt.split(IMAGE_PLACEHOLDER_STR)
116 if prompt.count(IMAGE_PLACEHOLDER_STR) != len(images):
117 raise ValueError(
118 "The number of images passed in does not match the number of <image> tokens in the prompt!"
119 )
120 prompt = []
121 for i in range(len(split_prompt)):
122 prompt.append(split_prompt[i])
123 if i < len(images):
124 prompt[-1] = prompt[-1].strip()
125 prompt.append(load_image(images[i]))
126
127 state = pipeline.run(
128 prompt,
129 max_new_tokens,
130 stop=stop,
131 temperature=temperature,
132 top_p=top_p,
133 top_k=top_k,
134 frequency_penalty=frequency_penalty,
135 presence_penalty=presence_penalty,
136 stream=True,
137 )
138

Callers 1

generate_stream_gateMethod · 0.95

Calls 2

load_imageFunction · 0.90
is_partial_stopFunction · 0.90

Tested by

no test coverage detected