(
self,
model_path: str,
engine_config: PytorchEngineConfig = None,
trust_remote_code: bool = False,
speculative_config: SpeculativeConfig = None,
)
| 95 | """ |
| 96 | |
| 97 | def __init__( |
| 98 | self, |
| 99 | model_path: str, |
| 100 | engine_config: PytorchEngineConfig = None, |
| 101 | trust_remote_code: bool = False, |
| 102 | speculative_config: SpeculativeConfig = None, |
| 103 | ) -> None: |
| 104 | # make sure engine config exist |
| 105 | engine_config = ConfigBuilder.update_engine_config(engine_config) |
| 106 | |
| 107 | # frequently gc would cause latency spike |
| 108 | # default threshold (700, 10, 10) |
| 109 | # WARNING: I don't know if it is a good idea to put gc setting here. |
| 110 | gc.set_threshold(10000, 100, 100) |
| 111 | |
| 112 | # dist args |
| 113 | self.tp = engine_config.tp |
| 114 | self.dp = engine_config.dp |
| 115 | self.dp_rank = engine_config.dp_rank |
| 116 | |
| 117 | # download models and adapters |
| 118 | if not os.path.exists(model_path): |
| 119 | model_path = get_model(model_path, engine_config.download_dir, engine_config.revision) |
| 120 | |
| 121 | adapters = engine_config.adapters |
| 122 | if adapters is not None and len(adapters) > 0: |
| 123 | adapters = self._download_adapters(adapters, engine_config) |
| 124 | |
| 125 | # check environment |
| 126 | checker = EngineChecker(model_path=model_path, |
| 127 | engine_config=engine_config, |
| 128 | trust_remote_code=trust_remote_code, |
| 129 | logger=logger) |
| 130 | checker.handle() |
| 131 | |
| 132 | # build configs |
| 133 | scheduler_config = ConfigBuilder.build_scheduler_config(engine_config) |
| 134 | cache_config = ConfigBuilder.build_cache_config(engine_config) |
| 135 | backend_config = ConfigBuilder.build_backend_config(engine_config) |
| 136 | dist_config = ConfigBuilder.build_dist_config(engine_config) |
| 137 | misc_config = ConfigBuilder.build_misc_config(engine_config) |
| 138 | # spec decode |
| 139 | self.specdecode_config = ConfigBuilder.build_specdecode_config(model_path, |
| 140 | speculative_config, |
| 141 | engine_config, |
| 142 | cache_config, |
| 143 | dist_config, |
| 144 | trust_remote_code=trust_remote_code, |
| 145 | ) |
| 146 | |
| 147 | # build model agent |
| 148 | self.executor = build_executor( |
| 149 | model_path, |
| 150 | cache_config=cache_config, |
| 151 | backend_config=backend_config, |
| 152 | dist_config=dist_config, |
| 153 | misc_config=misc_config, |
| 154 | adapters=adapters, |
nothing calls this directly
no test coverage detected