MCPcopy
hub / github.com/PaddlePaddle/ERNIE / __init__

Method __init__

tools/inference/infer.py:151–210  ·  view source on GitHub ↗

Initialize predictor with distributed setup and model loading. Args: args (Namespace): Configuration arguments tokenizer (Optional): Pre-initialized tokenizer model (Optional): Pre-initialized model kwargs: Additional model initializa

(self, args, tokenizer=None, model=None, **kwargs)

Source from the content-addressed store, hash-verified

149 """
150
151 def __init__(self, args, tokenizer=None, model=None, **kwargs):
152 """
153 Initialize predictor with distributed setup and model loading.
154
155 Args:
156 args (Namespace): Configuration arguments
157 tokenizer (Optional): Pre-initialized tokenizer
158 model (Optional): Pre-initialized model
159 kwargs: Additional model initialization parameters
160 """
161 self.runtime_timer = RuntimeTimer("Predictor")
162 self.num_input_tokens = 0
163 self.num_output_tokens = 0
164 self.args = args
165
166 # init distributed env
167 self.tensor_parallel_degree = dist.get_world_size()
168 self.tensor_parallel_rank = dist.get_rank()
169 if dist.get_world_size() > 1:
170 strategy = fleet.DistributedStrategy()
171 strategy.hybrid_configs = {
172 "dp_degree": 1,
173 "mp_degree": self.tensor_parallel_degree,
174 "pp_degree": 1,
175 "sharding_degree": 1,
176 }
177 fleet.init(is_collective=True, strategy=strategy)
178 hcg = fleet.get_hybrid_communicate_group()
179 self.tensor_parallel_rank = hcg.get_model_parallel_rank()
180
181 # init model & tokenizer
182 self.tokenizer = Ernie4_5_Tokenizer.from_pretrained(args.model_name_or_path)
183 self.tokenizer.padding_side = "left"
184 paddle.set_default_dtype(self.args.dtype)
185 self.config = Ernie4_5_MoeConfig.from_pretrained(
186 args.model_name_or_path,
187 quantization_config=dict(
188 weight_quantize_algo=args.weight_quantize_algo,
189 ignore_modules=[".*out_linear.*"],
190 ),
191 dtype=self.args.dtype,
192 fused_mt=False,
193 tensor_parallel_output=False,
194 sequence_parallel=False,
195 use_sparse_head_and_loss_fn=False,
196 use_fused_head_and_loss_fn=False,
197 fuse_linear=False,
198 recompute=False,
199 tensor_parallel_degree=self.tensor_parallel_degree,
200 tensor_parallel_rank=self.tensor_parallel_rank,
201 use_flash_attention=True,
202 moe_group="dummy",
203 )
204 self.model = Ernie4_5_MoeForCausalLM.from_pretrained(
205 args.model_name_or_path,
206 config=self.config,
207 )
208 gc.collect()

Callers

nothing calls this directly

Calls 2

initMethod · 0.80
evalMethod · 0.45

Tested by

no test coverage detected