| 6 | |
| 7 | @classmethod |
| 8 | def setUpClass(cls): |
| 9 | cls.mock_tok = Mock() |
| 10 | cls.mock_tok.role = Mock(return_value=[100, 101]) |
| 11 | cls.mock_tok.encode = Mock(return_value=[200, 201, 202]) |
| 12 | cls.mock_tok.decode = Mock(return_value="Hello") |
| 13 | cls.mock_tok.stream_decoder = Mock(return_value=lambda tid=None: "Hello" if tid is not None else "") |
| 14 | cls.mock_tok.end_turn = Mock(return_value=[998]) |
| 15 | cls.mock_tok.prefix = Mock(return_value=[1]) |
| 16 | cls.mock_tok.preset = "llama3" |
| 17 | cls.mock_tok.bos_id = 1 |
| 18 | cls.mock_tok.eos_id = 999 |
| 19 | cls.mock_tok.eot_id = None |
| 20 | cls.mock_tok.is_end = Mock(side_effect=lambda tid: tid in (999,)) |
| 21 | |
| 22 | cls.mock_model = Mock() |
| 23 | cls.mock_model.generate = Mock(side_effect=lambda ids, **kwargs: iter([300, 301, 999])) |
| 24 | cls.mock_model.get_start_pos = Mock(return_value=0) |
| 25 | |
| 26 | from tinygrad.llm.cli import LLMServer |
| 27 | |
| 28 | cls.server = LLMServer(('127.0.0.1', 0), cls.mock_model, "test-model", cls.mock_tok) |
| 29 | cls.port = cls.server.server_address[1] |
| 30 | cls.server_thread = threading.Thread(target=cls.server.serve_forever, daemon=True) |
| 31 | cls.server_thread.start() |
| 32 | time.sleep(0.1) |
| 33 | |
| 34 | from openai import OpenAI |
| 35 | cls.client = OpenAI(base_url=f"http://127.0.0.1:{cls.port}/v1", api_key="test") |
| 36 | |
| 37 | @classmethod |
| 38 | def tearDownClass(cls): |