Initialize predictor with distributed setup and model loading. Args: args (Namespace): Configuration arguments tokenizer (Optional): Pre-initialized tokenizer model (Optional): Pre-initialized model kwargs: Additional model initializa
(self, args, tokenizer=None, model=None, **kwargs)
| 149 | """ |
| 150 | |
| 151 | def __init__(self, args, tokenizer=None, model=None, **kwargs): |
| 152 | """ |
| 153 | Initialize predictor with distributed setup and model loading. |
| 154 | |
| 155 | Args: |
| 156 | args (Namespace): Configuration arguments |
| 157 | tokenizer (Optional): Pre-initialized tokenizer |
| 158 | model (Optional): Pre-initialized model |
| 159 | kwargs: Additional model initialization parameters |
| 160 | """ |
| 161 | self.runtime_timer = RuntimeTimer("Predictor") |
| 162 | self.num_input_tokens = 0 |
| 163 | self.num_output_tokens = 0 |
| 164 | self.args = args |
| 165 | |
| 166 | # init distributed env |
| 167 | self.tensor_parallel_degree = dist.get_world_size() |
| 168 | self.tensor_parallel_rank = dist.get_rank() |
| 169 | if dist.get_world_size() > 1: |
| 170 | strategy = fleet.DistributedStrategy() |
| 171 | strategy.hybrid_configs = { |
| 172 | "dp_degree": 1, |
| 173 | "mp_degree": self.tensor_parallel_degree, |
| 174 | "pp_degree": 1, |
| 175 | "sharding_degree": 1, |
| 176 | } |
| 177 | fleet.init(is_collective=True, strategy=strategy) |
| 178 | hcg = fleet.get_hybrid_communicate_group() |
| 179 | self.tensor_parallel_rank = hcg.get_model_parallel_rank() |
| 180 | |
| 181 | # init model & tokenizer |
| 182 | self.tokenizer = Ernie4_5_Tokenizer.from_pretrained(args.model_name_or_path) |
| 183 | self.tokenizer.padding_side = "left" |
| 184 | paddle.set_default_dtype(self.args.dtype) |
| 185 | self.config = Ernie4_5_MoeConfig.from_pretrained( |
| 186 | args.model_name_or_path, |
| 187 | quantization_config=dict( |
| 188 | weight_quantize_algo=args.weight_quantize_algo, |
| 189 | ignore_modules=[".*out_linear.*"], |
| 190 | ), |
| 191 | dtype=self.args.dtype, |
| 192 | fused_mt=False, |
| 193 | tensor_parallel_output=False, |
| 194 | sequence_parallel=False, |
| 195 | use_sparse_head_and_loss_fn=False, |
| 196 | use_fused_head_and_loss_fn=False, |
| 197 | fuse_linear=False, |
| 198 | recompute=False, |
| 199 | tensor_parallel_degree=self.tensor_parallel_degree, |
| 200 | tensor_parallel_rank=self.tensor_parallel_rank, |
| 201 | use_flash_attention=True, |
| 202 | moe_group="dummy", |
| 203 | ) |
| 204 | self.model = Ernie4_5_MoeForCausalLM.from_pretrained( |
| 205 | args.model_name_or_path, |
| 206 | config=self.config, |
| 207 | ) |
| 208 | gc.collect() |