MCPcopy
hub / github.com/NVIDIA/TensorRT-LLM / engine_from_checkpoint

Function engine_from_checkpoint

tests/unittest/llmapi/test_llm_multi_gpu.py:52–75  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

50
51@pytest.fixture(scope="module")
52def engine_from_checkpoint() -> tempfile.TemporaryDirectory:
53 tokenizer = TransformersTokenizer.from_pretrained(llama_model_path)
54 assert tokenizer is not None
55 tp_size = 2
56 with tempfile.TemporaryDirectory() as ckpt_dir:
57 for rank in range(tp_size):
58 mapping = Mapping(world_size=tp_size, tp_size=tp_size, rank=rank)
59 llama = LLaMAForCausalLM.from_hugging_face(llama_model_path,
60 mapping=mapping)
61 llama.save_checkpoint(ckpt_dir, save_config=(rank == 0))
62 del llama
63
64 llm = LLM(
65 ckpt_dir,
66 tokenizer=tokenizer,
67 kv_cache_config=global_kv_cache_config,
68 )
69 assert llm.args.parallel_config.tp_size == tp_size
70
71 tmpdir = tempfile.TemporaryDirectory()
72 with llm:
73 llm.save(tmpdir.name)
74
75 return tmpdir
76
77
78@pytest.mark.gpu2

Callers

nothing calls this directly

Calls 6

MappingClass · 0.90
LLMClass · 0.50
from_pretrainedMethod · 0.45
from_hugging_faceMethod · 0.45
save_checkpointMethod · 0.45
saveMethod · 0.45

Tested by

no test coverage detected