MCPcopy
hub / github.com/Tele-AI/Telechat / from_quantized

Method from_quantized

quant/modeling_telechat_gptq.py:102–348  ·  view source on GitHub ↗

load quantized model from local disk

(
            cls,
            model_name_or_path: Optional[str] = None,
            save_dir: Optional[str] = None,
            device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None,
            max_memory: Optional[dict] = None,
            device: Optional[Union[str, int]] = None,
            low_cpu_mem_usage: bool = False,
            use_triton: bool = False,
            torch_dtype: torch.dtype = torch.float16,
            inject_fused_attention: bool = True,
            inject_fused_mlp: bool = True,
            use_cuda_fp16: bool = True,
            quantize_config: Optional[BaseQuantizeConfig] = None,
            model_basename: Optional[str] = None,
            use_safetensors: bool = False,
            trust_remote_code: bool = False,
            warmup_triton: bool = False,
            trainable: bool = False,
            **kwargs
    )

Source from the content-addressed store, hash-verified

100
101 @classmethod
102 def from_quantized(
103 cls,
104 model_name_or_path: Optional[str] = None,
105 save_dir: Optional[str] = None,
106 device_map: Optional[Union[str, Dict[str, Union[int, str]]]] = None,
107 max_memory: Optional[dict] = None,
108 device: Optional[Union[str, int]] = None,
109 low_cpu_mem_usage: bool = False,
110 use_triton: bool = False,
111 torch_dtype: torch.dtype = torch.float16,
112 inject_fused_attention: bool = True,
113 inject_fused_mlp: bool = True,
114 use_cuda_fp16: bool = True,
115 quantize_config: Optional[BaseQuantizeConfig] = None,
116 model_basename: Optional[str] = None,
117 use_safetensors: bool = False,
118 trust_remote_code: bool = False,
119 warmup_triton: bool = False,
120 trainable: bool = False,
121 **kwargs
122 ):
123 """load quantized model from local disk"""
124
125 # Parameters related to loading from Hugging Face Hub
126 cache_dir = kwargs.pop("cache_dir", None)
127 force_download = kwargs.pop("force_download", False)
128 resume_download = kwargs.pop("resume_download", False)
129 proxies = kwargs.pop("proxies", None)
130 local_files_only = kwargs.pop("local_files_only", False)
131 use_auth_token = kwargs.pop("use_auth_token", None)
132 revision = kwargs.pop("revision", None)
133 subfolder = kwargs.pop("subfolder", "")
134 commit_hash = kwargs.pop("_commit_hash", None)
135
136 if use_triton and not TRITON_AVAILABLE:
137 logger.warning("triton is not installed, reset use_triton to False")
138 use_triton = False
139
140 # == step1: prepare configs and file names == #
141 if model_name_or_path and save_dir:
142 logger.warning("save_dir will be ignored because model_name_or_path is explicit specified.")
143 if not model_name_or_path and save_dir:
144 model_name_or_path = save_dir
145 logger.warning("save_dir is deprecated and will be removed in version 0.3.0", PendingDeprecationWarning,
146 stacklevel=2)
147 if not model_name_or_path and not save_dir:
148 raise ValueError("at least one of model_name_or_path or save_dir should be specified.")
149
150 config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=trust_remote_code)
151
152 if quantize_config is None:
153 quantize_config = BaseQuantizeConfig.from_pretrained(model_name_or_path, **kwargs)
154
155 if model_basename is None:
156 if quantize_config.model_file_base_name:
157 model_basename = quantize_config.model_file_base_name
158 else:
159 model_basename = f"gptq_model-{quantize_config.bits}bit-{quantize_config.group_size}g"

Callers 1

mainFunction · 0.80

Calls 4

from_pretrainedMethod · 0.80
evalMethod · 0.80
popMethod · 0.45
appendMethod · 0.45

Tested by

no test coverage detected