(self, params)
| 79 | self.init_heart_beat() |
| 80 | |
| 81 | async def generate_stream(self, params): |
| 82 | self.call_ct += 1 |
| 83 | |
| 84 | prompt = params.pop("prompt") |
| 85 | images = params.get("images", []) |
| 86 | temperature = float(params.get("temperature", 1.0)) |
| 87 | top_p = float(params.get("top_p", 1.0)) |
| 88 | top_k = params.get("top_k", -1.0) |
| 89 | frequency_penalty = float(params.get("frequency_penalty", 0.0)) |
| 90 | presence_penalty = float(params.get("presence_penalty", 0.0)) |
| 91 | max_new_tokens = params.get("max_new_tokens", 256) |
| 92 | stop_str = params.get("stop", None) |
| 93 | stop_token_ids = params.get("stop_token_ids", None) or [] |
| 94 | echo = params.get("echo", True) |
| 95 | |
| 96 | # Handle stop_str |
| 97 | stop = [] |
| 98 | if isinstance(stop_str, str) and stop_str != "": |
| 99 | stop.append(stop_str) |
| 100 | elif isinstance(stop_str, list) and stop_str != []: |
| 101 | stop.extend(stop_str) |
| 102 | |
| 103 | for tid in stop_token_ids: |
| 104 | if tid is not None: |
| 105 | s = self.tokenizer.decode(tid) |
| 106 | if s != "": |
| 107 | stop.append(s) |
| 108 | |
| 109 | # make sampling params for sgl.gen |
| 110 | top_p = max(top_p, 1e-5) |
| 111 | if temperature <= 1e-5: |
| 112 | top_p = 1.0 |
| 113 | |
| 114 | # split prompt by image token |
| 115 | split_prompt = prompt.split(IMAGE_PLACEHOLDER_STR) |
| 116 | if prompt.count(IMAGE_PLACEHOLDER_STR) != len(images): |
| 117 | raise ValueError( |
| 118 | "The number of images passed in does not match the number of <image> tokens in the prompt!" |
| 119 | ) |
| 120 | prompt = [] |
| 121 | for i in range(len(split_prompt)): |
| 122 | prompt.append(split_prompt[i]) |
| 123 | if i < len(images): |
| 124 | prompt[-1] = prompt[-1].strip() |
| 125 | prompt.append(load_image(images[i])) |
| 126 | |
| 127 | state = pipeline.run( |
| 128 | prompt, |
| 129 | max_new_tokens, |
| 130 | stop=stop, |
| 131 | temperature=temperature, |
| 132 | top_p=top_p, |
| 133 | top_k=top_k, |
| 134 | frequency_penalty=frequency_penalty, |
| 135 | presence_penalty=presence_penalty, |
| 136 | stream=True, |
| 137 | ) |
| 138 |
no test coverage detected