hub / github.com/Tele-AI/Telechat / from_quantized

Method from_quantized

quant/modeling_telechat_gptq.py:102–348 · view source on GitHub ↗

load quantized model from local disk

(
            cls,
            model_name_or_path: Optional[str] = None,
            save_dir: Optional[str] = None,
            device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None,
            max_memory: Optional[dict] = None,
            device: Optional[Union[str, int]] = None,
            low_cpu_mem_usage: bool = False,
            use_triton: bool = False,
            torch_dtype: torch.dtype = torch.float16,
            inject_fused_attention: bool = True,
            inject_fused_mlp: bool = True,
            use_cuda_fp16: bool = True,
            quantize_config: Optional[BaseQuantizeConfig] = None,
            model_basename: Optional[str] = None,
            use_safetensors: bool = False,
            trust_remote_code: bool = False,
            warmup_triton: bool = False,
            trainable: bool = False,
            **kwargs
    )

Source from the content-addressed store, hash-verified

100
101	@classmethod
102	def from_quantized(
103	cls,
104	model_name_or_path: Optional[str] = None,
105	save_dir: Optional[str] = None,
106	device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None,
107	max_memory: Optional[dict] = None,
108	device: Optional[Union[str, int]] = None,
109	low_cpu_mem_usage: bool = False,
110	use_triton: bool = False,
111	torch_dtype: torch.dtype = torch.float16,
112	inject_fused_attention: bool = True,
113	inject_fused_mlp: bool = True,
114	use_cuda_fp16: bool = True,
115	quantize_config: Optional[BaseQuantizeConfig] = None,
116	model_basename: Optional[str] = None,
117	use_safetensors: bool = False,
118	trust_remote_code: bool = False,
119	warmup_triton: bool = False,
120	trainable: bool = False,
121	**kwargs
122	):
123	"""load quantized model from local disk"""
124
125	# Parameters related to loading from Hugging Face Hub
126	cache_dir = kwargs.pop("cache_dir", None)
127	force_download = kwargs.pop("force_download", False)
128	resume_download = kwargs.pop("resume_download", False)
129	proxies = kwargs.pop("proxies", None)
130	local_files_only = kwargs.pop("local_files_only", False)
131	use_auth_token = kwargs.pop("use_auth_token", None)
132	revision = kwargs.pop("revision", None)
133	subfolder = kwargs.pop("subfolder", "")
134	commit_hash = kwargs.pop("_commit_hash", None)
135
136	if use_triton and not TRITON_AVAILABLE:
137	logger.warning("triton is not installed, reset use_triton to False")
138	use_triton = False
139
140	# == step1: prepare configs and file names == #
141	if model_name_or_path and save_dir:
142	logger.warning("save_dir will be ignored because model_name_or_path is explicit specified.")
143	if not model_name_or_path and save_dir:
144	model_name_or_path = save_dir
145	logger.warning("save_dir is deprecated and will be removed in version 0.3.0", PendingDeprecationWarning,
146	stacklevel=2)
147	if not model_name_or_path and not save_dir:
148	raise ValueError("at least one of model_name_or_path or save_dir should be specified.")
149
150	config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=trust_remote_code)
151
152	if quantize_config is None:
153	quantize_config = BaseQuantizeConfig.from_pretrained(model_name_or_path, **kwargs)
154
155	if model_basename is None:
156	if quantize_config.model_file_base_name:
157	model_basename = quantize_config.model_file_base_name
158	else:
159	model_basename = f"gptq_model-{quantize_config.bits}bit-{quantize_config.group_size}g"

Callers 1

mainFunction · 0.80

Calls 4

from_pretrainedMethod · 0.80

evalMethod · 0.80

popMethod · 0.45

appendMethod · 0.45

Tested by

no test coverage detected