(self, query: str)
| 89 | return input_str, real_length |
| 90 | |
| 91 | def run_infer(self, query: str): |
| 92 | start_time = time.time() |
| 93 | output = self.llm.generate([query], self.sampling_params)[0] |
| 94 | time_cost = time.time() - start_time |
| 95 | |
| 96 | generated_text = output.outputs[0].text |
| 97 | real_out_length = len(self.tokenizer.tokenize(generated_text)) |
| 98 | |
| 99 | return time_cost, real_out_length, generated_text |
| 100 | |
| 101 | def run(self): |
| 102 |