(model, quant_config: QuantConfig)
| 230 | |
| 231 | |
| 232 | def fp8_quantize(model, quant_config: QuantConfig): |
| 233 | assert quant_config.quant_mode.has_fp8_qdq() |
| 234 | |
| 235 | quant_map = { |
| 236 | ColumnLinear: FP8Linear, |
| 237 | RowLinear: FP8RowLinear, |
| 238 | MixtureOfExperts: MixtureOfExperts, |
| 239 | } |
| 240 | |
| 241 | model = quantize_layers( |
| 242 | model, |
| 243 | quant_config, |
| 244 | quant_map, |
| 245 | ) |
| 246 | return model |
| 247 | |
| 248 | |
| 249 | def fp8_rowwise_quantize(model, quant_config: QuantConfig): |
no test coverage detected