MCPcopy Index your code
hub / github.com/NVIDIA/TensorRT-LLM / __call__

Method __call__

tensorrt_llm/llmapi/llm_utils.py:643–706  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

641 return model_obj.model_dir
642
643 def __call__(self) -> Tuple[Path, Union[Path, None]]:
644
645 if self.llm_args.model_format is _ModelFormatKind.TLLM_ENGINE:
646 return Path(self.llm_args.model), None
647
648 # Download speculative model from HuggingFace if needed (all backends)
649 if (self.llm_args.speculative_config is not None and
650 self.llm_args.speculative_config.speculative_model is not None):
651 spec_model_obj = _ModelWrapper(
652 self.llm_args.speculative_config.speculative_model)
653 spec_model_dir = self._download_hf_model_if_needed(spec_model_obj)
654 self.llm_args.speculative_config.speculative_model = spec_model_dir
655
656 # AutoDeploy doesn't use ModelLoader
657 if self.llm_args.backend == "_autodeploy":
658 return None, ""
659
660 self.engine_cache_stage: Optional[CachedStage] = None
661 self._hf_model_dir = None
662 self.model_loader = ModelLoader(self.llm_args)
663
664 if self.llm_args.backend is not None:
665 if self.llm_args.backend not in ["pytorch", "_autodeploy"]:
666 raise ValueError(
667 f'backend {self.llm_args.backend} is not supported.')
668
669 self._hf_model_dir = self._download_hf_model_if_needed(
670 self.model_loader.model_obj, revision=self.llm_args.revision)
671
672 if self.llm_args.quant_config.quant_algo is not None:
673 logger.warning(
674 "QuantConfig for pytorch backend is ignored. You can load"
675 "quantized model with hf_quant_config.json directly.")
676 # Currently, this is to make updated quant_config visible by llm.args.quant_config
677 # TODO: Unify the logics with those in tensorrt_llm/_torch/model_config.py
678 self.model_loader._update_from_hf_quant_config()
679
680 return None, self._hf_model_dir
681
682 if self.model_loader.model_obj.is_hub_model:
683 # This will download the config.json from HF model hub, this helps to create a PretrainedConfig for
684 # cache key.
685 self._hf_model_dir = download_hf_pretrained_config(
686 self.model_loader.model_obj.model_name,
687 revision=self.llm_args.revision)
688
689 elif self.model_loader.model_obj.is_local_model:
690 self._hf_model_dir = self.model_loader.model_obj.model_dir if self.llm_args.model_format is _ModelFormatKind.HF else None
691
692 if self.build_cache_enabled:
693 print_colored("Build cache is enabled.\n", 'yellow')
694
695 self.engine_cache_stage = self._get_engine_cache_stage()
696 if self.engine_cache_stage.is_cached():
697 self.llm_build_stats.cache_hitted = True
698 print_colored(
699 f"Reusing cached engine in {self.engine_cache_stage.get_engine_path()}\n\n",
700 'grey')

Callers

nothing calls this directly

Calls 11

_build_modelMethod · 0.95
_ModelWrapperClass · 0.85
print_coloredFunction · 0.85
is_cachedMethod · 0.80
get_engine_pathMethod · 0.80
ModelLoaderClass · 0.70
warningMethod · 0.45

Tested by

no test coverage detected