hub / github.com/Tele-AI/Telechat / from_pretrained

Method from_pretrained

quant/modeling_telechat_gptq.py:31–99 · view source on GitHub ↗

load un-quantized pretrained model to cpu

(
            cls,
            pretrained_model_name_or_path: str,
            quantize_config: BaseQuantizeConfig,
            max_memory: Optional[dict] = None,
            trust_remote_code: bool = False,
            torch_dtype: torch.dtype = torch.float16,
            **model_init_kwargs
    )

Source from the content-addressed store, hash-verified

29
30	@classmethod
31	def from_pretrained(
32	cls,
33	pretrained_model_name_or_path: str,
34	quantize_config: BaseQuantizeConfig,
35	max_memory: Optional[dict] = None,
36	trust_remote_code: bool = False,
37	torch_dtype: torch.dtype = torch.float16,
38	**model_init_kwargs
39	):
40	"""load un-quantized pretrained model to cpu"""
41
42	if not torch.cuda.is_available():
43	raise EnvironmentError("Load pretrained model to do quantization requires CUDA available.")
44
45	def skip(args, *kwargs):
46	pass
47
48	torch.nn.init.kaiming_uniform_ = skip
49	torch.nn.init.uniform_ = skip
50	torch.nn.init.normal_ = skip
51
52	config = AutoConfig.from_pretrained(pretrained_model_name_or_path, trust_remote_code=True)
53
54	# enforce some values despite user specified
55	model_init_kwargs["torch_dtype"] = torch_dtype
56	model_init_kwargs["trust_remote_code"] = trust_remote_code
57	if max_memory:
58	if "disk" in max_memory:
59	raise NotImplementedError("disk offload not support yet.")
60	with accelerate.init_empty_weights():
61	model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
62	model.tie_weights()
63
64	max_memory = accelerate.utils.get_balanced_memory(
65	model,
66	max_memory=max_memory,
67	no_split_module_classes=[cls.layer_type],
68	dtype=model_init_kwargs["torch_dtype"],
69	low_zero=False
70	)
71	model_init_kwargs["device_map"] = accelerate.infer_auto_device_map(
72	model,
73	max_memory=max_memory,
74	no_split_module_classes=[cls.layer_type],
75	dtype=model_init_kwargs["torch_dtype"]
76	)
77	model_init_kwargs["low_cpu_mem_usage"] = True
78
79	del model
80	else:
81	model_init_kwargs["device_map"] = None
82	model_init_kwargs["low_cpu_mem_usage"] = False
83
84	torch.cuda.empty_cache()
85
86	model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path, **model_init_kwargs)
87	model_config = model.config.to_dict()
88	seq_len_keys = ["max_position_embeddings", "seq_length", "n_positions"]

Callers 10

quant.pyFile · 0.80

from_quantizedMethod · 0.80

mainFunction · 0.80

load_telechat_tokenizerFunction · 0.80

create_hf_telechatFunction · 0.80

mainFunction · 0.80

telechat_service.pyFile · 0.80

score_MMLU.pyFile · 0.80

score_CEVAL.pyFile · 0.80

Calls 1

evalMethod · 0.80

Tested by

no test coverage detected