(
self,
tools: list[dict],
history: list[Conversation],
**parameters,
)
| 24 | ).eval() |
| 25 | |
| 26 | def generate_stream( |
| 27 | self, |
| 28 | tools: list[dict], |
| 29 | history: list[Conversation], |
| 30 | **parameters, |
| 31 | ) -> Generator[tuple[str | dict, list[dict]]]: |
| 32 | chat_history = process_input(history, tools) |
| 33 | model_inputs = self.tokenizer.apply_chat_template( |
| 34 | chat_history, |
| 35 | add_generation_prompt=True, |
| 36 | tokenize=True, |
| 37 | return_tensors="pt", |
| 38 | return_dict=True, |
| 39 | ).to(self.model.device) |
| 40 | streamer = TextIteratorStreamer( |
| 41 | tokenizer=self.tokenizer, |
| 42 | timeout=5, |
| 43 | skip_prompt=True, |
| 44 | ) |
| 45 | generate_kwargs = { |
| 46 | **model_inputs, |
| 47 | "streamer": streamer, |
| 48 | "eos_token_id": [151329, 151336, 151338], |
| 49 | "do_sample": True, |
| 50 | } |
| 51 | generate_kwargs.update(parameters) |
| 52 | t = Thread(target=self.model.generate, kwargs=generate_kwargs) |
| 53 | t.start() |
| 54 | total_text = "" |
| 55 | for token_text in streamer: |
| 56 | total_text += token_text |
| 57 | yield process_response(total_text, chat_history) |
nothing calls this directly
no test coverage detected