()
| 50 | |
| 51 | @pytest.fixture(scope="module") |
| 52 | def engine_from_checkpoint() -> tempfile.TemporaryDirectory: |
| 53 | tokenizer = TransformersTokenizer.from_pretrained(llama_model_path) |
| 54 | assert tokenizer is not None |
| 55 | tp_size = 2 |
| 56 | with tempfile.TemporaryDirectory() as ckpt_dir: |
| 57 | for rank in range(tp_size): |
| 58 | mapping = Mapping(world_size=tp_size, tp_size=tp_size, rank=rank) |
| 59 | llama = LLaMAForCausalLM.from_hugging_face(llama_model_path, |
| 60 | mapping=mapping) |
| 61 | llama.save_checkpoint(ckpt_dir, save_config=(rank == 0)) |
| 62 | del llama |
| 63 | |
| 64 | llm = LLM( |
| 65 | ckpt_dir, |
| 66 | tokenizer=tokenizer, |
| 67 | kv_cache_config=global_kv_cache_config, |
| 68 | ) |
| 69 | assert llm.args.parallel_config.tp_size == tp_size |
| 70 | |
| 71 | tmpdir = tempfile.TemporaryDirectory() |
| 72 | with llm: |
| 73 | llm.save(tmpdir.name) |
| 74 | |
| 75 | return tmpdir |
| 76 | |
| 77 | |
| 78 | @pytest.mark.gpu2 |
nothing calls this directly
no test coverage detected