()
| 40 | |
| 41 | |
| 42 | def test_CachedModelLoader(): |
| 43 | # CachedModelLoader enables engine caching and multi-gpu building |
| 44 | args = TrtLlmArgs( |
| 45 | model=llama_model_path, |
| 46 | kv_cache_config=KvCacheConfig(free_gpu_memory_fraction=0.4), |
| 47 | enable_build_cache=True) |
| 48 | stats = LlmBuildStats() |
| 49 | model_loader = CachedModelLoader(args, llm_build_stats=stats) |
| 50 | engine_dir, _ = model_loader() |
| 51 | assert engine_dir |
| 52 | assert engine_dir.exists() and engine_dir.is_dir() |
| 53 | model_format = get_model_format(engine_dir, trust_remote_code=True) |
| 54 | assert model_format is _ModelFormatKind.TLLM_ENGINE |
| 55 | |
| 56 | |
| 57 | def test_LlmArgs_default_gpus_per_node(): |
nothing calls this directly
no test coverage detected