The CacheModelLoader is used to build the model in both single or multi-gpu, with cache might be enabled.
| 586 | |
| 587 | |
| 588 | class CachedModelLoader: |
| 589 | ''' |
| 590 | The CacheModelLoader is used to build the model in both single or multi-gpu, with cache might be enabled. |
| 591 | ''' |
| 592 | |
| 593 | def __init__( |
| 594 | self, |
| 595 | llm_args: LlmArgs, |
| 596 | llm_build_stats: weakref.ReferenceType["LlmBuildStats"], |
| 597 | mpi_session: Optional[MpiSession] = None, |
| 598 | workspace: Optional[str] = None, |
| 599 | ): |
| 600 | self.llm_args = llm_args |
| 601 | self.mpi_session = mpi_session |
| 602 | self._workspace = workspace or tempfile.TemporaryDirectory() |
| 603 | self.llm_build_stats = llm_build_stats |
| 604 | |
| 605 | # This is used for build cache. To compute the cache key, a local HF model is required, it could be download |
| 606 | # from HF model hub, so this helps to hold the path. |
| 607 | self._hf_model_dir: Optional[Path] = None |
| 608 | |
| 609 | @property |
| 610 | def workspace(self) -> Path: |
| 611 | return Path(self._workspace.name) if isinstance( |
| 612 | self._workspace, tempfile.TemporaryDirectory) else Path( |
| 613 | self._workspace) |
| 614 | |
| 615 | def _submit_to_all_workers( |
| 616 | self, |
| 617 | task: Callable[..., Any], |
| 618 | *args, |
| 619 | **kwargs, |
| 620 | ) -> List[Any]: |
| 621 | if self.llm_args.parallel_config.is_multi_gpu: |
| 622 | return self.mpi_session.submit_sync(task, *args, **kwargs) |
| 623 | else: |
| 624 | return [task(*args, **kwargs)] |
| 625 | |
| 626 | def _download_hf_model_if_needed(self, |
| 627 | model_obj: _ModelWrapper, |
| 628 | revision: Optional[str] = None) -> Path: |
| 629 | """Download a model from HF hub if needed. |
| 630 | |
| 631 | Also updates the model_obj.model_dir with the local model dir on rank 0. |
| 632 | """ |
| 633 | if model_obj.is_hub_model: |
| 634 | model_dirs = self._submit_to_all_workers( |
| 635 | CachedModelLoader._node_download_hf_model, |
| 636 | model=model_obj.model_name, |
| 637 | revision=revision) |
| 638 | model_dir = model_dirs[0] |
| 639 | model_obj.model_dir = model_dir |
| 640 | return model_dir |
| 641 | return model_obj.model_dir |
| 642 | |
| 643 | def __call__(self) -> Tuple[Path, Union[Path, None]]: |
| 644 | |
| 645 | if self.llm_args.model_format is _ModelFormatKind.TLLM_ENGINE: |
no outgoing calls