(cls)
| 26 | @classmethod |
| 27 | @gpu_device_initializer(log_prefix="TestQuantedModel") |
| 28 | def setUpClass(cls): |
| 29 | quantization_config = dict( |
| 30 | weight_quantize_algo={"weight_only_int8": [".*mlp.*", ".*self_attn.*"]}, |
| 31 | ignore_modules=[".*out_linear.*"], |
| 32 | ) |
| 33 | quantization_config = QuantizationConfig.from_dict(quantization_config) |
| 34 | model_config = AutoConfig.from_pretrained( |
| 35 | "PaddleFormers/tiny-random-qwen3", |
| 36 | dtype="bfloat16", |
| 37 | quantization_config=quantization_config, |
| 38 | ) |
| 39 | cls.model = AutoModelForCausalLM.from_pretrained( |
| 40 | "PaddleFormers/tiny-random-qwen3", |
| 41 | config=model_config, |
| 42 | load_checkpoint_format="flex_checkpoint", |
| 43 | ) |
| 44 | |
| 45 | def test_quant_model(self): |
| 46 | input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338] |
nothing calls this directly
no test coverage detected