MCPcopy Index your code
hub / github.com/NVIDIA/TensorRT-LLM / CachedModelLoader

Class CachedModelLoader

tensorrt_llm/llmapi/llm_utils.py:588–879  ·  view source on GitHub ↗

The CacheModelLoader is used to build the model in both single or multi-gpu, with cache might be enabled.

Source from the content-addressed store, hash-verified

586
587
588class CachedModelLoader:
589 '''
590 The CacheModelLoader is used to build the model in both single or multi-gpu, with cache might be enabled.
591 '''
592
593 def __init__(
594 self,
595 llm_args: LlmArgs,
596 llm_build_stats: weakref.ReferenceType["LlmBuildStats"],
597 mpi_session: Optional[MpiSession] = None,
598 workspace: Optional[str] = None,
599 ):
600 self.llm_args = llm_args
601 self.mpi_session = mpi_session
602 self._workspace = workspace or tempfile.TemporaryDirectory()
603 self.llm_build_stats = llm_build_stats
604
605 # This is used for build cache. To compute the cache key, a local HF model is required, it could be download
606 # from HF model hub, so this helps to hold the path.
607 self._hf_model_dir: Optional[Path] = None
608
609 @property
610 def workspace(self) -> Path:
611 return Path(self._workspace.name) if isinstance(
612 self._workspace, tempfile.TemporaryDirectory) else Path(
613 self._workspace)
614
615 def _submit_to_all_workers(
616 self,
617 task: Callable[..., Any],
618 *args,
619 **kwargs,
620 ) -> List[Any]:
621 if self.llm_args.parallel_config.is_multi_gpu:
622 return self.mpi_session.submit_sync(task, *args, **kwargs)
623 else:
624 return [task(*args, **kwargs)]
625
626 def _download_hf_model_if_needed(self,
627 model_obj: _ModelWrapper,
628 revision: Optional[str] = None) -> Path:
629 """Download a model from HF hub if needed.
630
631 Also updates the model_obj.model_dir with the local model dir on rank 0.
632 """
633 if model_obj.is_hub_model:
634 model_dirs = self._submit_to_all_workers(
635 CachedModelLoader._node_download_hf_model,
636 model=model_obj.model_name,
637 revision=revision)
638 model_dir = model_dirs[0]
639 model_obj.model_dir = model_dir
640 return model_dir
641 return model_obj.model_dir
642
643 def __call__(self) -> Tuple[Path, Union[Path, None]]:
644
645 if self.llm_args.model_format is _ModelFormatKind.TLLM_ENGINE:

Callers 2

test_CachedModelLoaderFunction · 0.85
_build_modelMethod · 0.85

Calls

no outgoing calls

Tested by 1

test_CachedModelLoaderFunction · 0.68