(
conv,
model_name,
worker_addr,
prompt,
temperature,
repetition_penalty,
top_p,
max_new_tokens,
images,
)
| 299 | |
| 300 | |
| 301 | def model_worker_stream_iter( |
| 302 | conv, |
| 303 | model_name, |
| 304 | worker_addr, |
| 305 | prompt, |
| 306 | temperature, |
| 307 | repetition_penalty, |
| 308 | top_p, |
| 309 | max_new_tokens, |
| 310 | images, |
| 311 | ): |
| 312 | # Make requests |
| 313 | gen_params = { |
| 314 | "model": model_name, |
| 315 | "prompt": prompt, |
| 316 | "temperature": temperature, |
| 317 | "repetition_penalty": repetition_penalty, |
| 318 | "top_p": top_p, |
| 319 | "max_new_tokens": max_new_tokens, |
| 320 | "stop": conv.stop_str, |
| 321 | "stop_token_ids": conv.stop_token_ids, |
| 322 | "echo": False, |
| 323 | } |
| 324 | |
| 325 | logger.info(f"==== request ====\n{gen_params}") |
| 326 | |
| 327 | if len(images) > 0: |
| 328 | gen_params["images"] = images |
| 329 | |
| 330 | # Stream output |
| 331 | response = requests.post( |
| 332 | worker_addr + "/worker_generate_stream", |
| 333 | headers=headers, |
| 334 | json=gen_params, |
| 335 | stream=True, |
| 336 | timeout=WORKER_API_TIMEOUT, |
| 337 | ) |
| 338 | for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"): |
| 339 | if chunk: |
| 340 | data = json.loads(chunk.decode()) |
| 341 | yield data |
| 342 | |
| 343 | |
| 344 | def is_limit_reached(model_name, ip): |
no outgoing calls
no test coverage detected
searching dependent graphs…