hub / github.com/NVIDIA/TensorRT-LLM / CachedModelLoader

Class CachedModelLoader

tensorrt_llm/llmapi/llm_utils.py:588–879 · view source on GitHub ↗

The CacheModelLoader is used to build the model in both single or multi-gpu, with cache might be enabled.

Source from the content-addressed store, hash-verified

586
587
588	class CachedModelLoader:
589	'''
590	The CacheModelLoader is used to build the model in both single or multi-gpu, with cache might be enabled.
591	'''
592
593	def __init__(
594	self,
595	llm_args: LlmArgs,
596	llm_build_stats: weakref.ReferenceType["LlmBuildStats"],
597	mpi_session: Optional[MpiSession] = None,
598	workspace: Optional[str] = None,
599	):
600	self.llm_args = llm_args
601	self.mpi_session = mpi_session
602	self._workspace = workspace or tempfile.TemporaryDirectory()
603	self.llm_build_stats = llm_build_stats
604
605	# This is used for build cache. To compute the cache key, a local HF model is required, it could be download
606	# from HF model hub, so this helps to hold the path.
607	self._hf_model_dir: Optional[Path] = None
608
609	@property
610	def workspace(self) -> Path:
611	return Path(self._workspace.name) if isinstance(
612	self._workspace, tempfile.TemporaryDirectory) else Path(
613	self._workspace)
614
615	def _submit_to_all_workers(
616	self,
617	task: Callable[..., Any],
618	*args,
619	**kwargs,
620	) -> List[Any]:
621	if self.llm_args.parallel_config.is_multi_gpu:
622	return self.mpi_session.submit_sync(task, args, *kwargs)
623	else:
624	return [task(args, *kwargs)]
625
626	def _download_hf_model_if_needed(self,
627	model_obj: _ModelWrapper,
628	revision: Optional[str] = None) -> Path:
629	"""Download a model from HF hub if needed.
630
631	Also updates the model_obj.model_dir with the local model dir on rank 0.
632	"""
633	if model_obj.is_hub_model:
634	model_dirs = self._submit_to_all_workers(
635	CachedModelLoader._node_download_hf_model,
636	model=model_obj.model_name,
637	revision=revision)
638	model_dir = model_dirs[0]
639	model_obj.model_dir = model_dir
640	return model_dir
641	return model_obj.model_dir
642
643	def __call__(self) -> Tuple[Path, Union[Path, None]]:
644
645	if self.llm_args.model_format is _ModelFormatKind.TLLM_ENGINE:

Callers 2

test_CachedModelLoaderFunction · 0.85

_build_modelMethod · 0.85

Calls

no outgoing calls

Tested by 1

test_CachedModelLoaderFunction · 0.68