(blocks, block_size, type_size, dtype=None)
| 241 | KVALUES = torch.tensor([-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113], dtype=torch.int8) |
| 242 | |
| 243 | def dequantize_blocks_IQ4_NL(blocks, block_size, type_size, dtype=None): |
| 244 | n_blocks = blocks.shape[0] |
| 245 | |
| 246 | d, qs = split_block_dims(blocks, 2) |
| 247 | d = d.view(torch.float16).to(dtype) |
| 248 | |
| 249 | qs = qs.reshape((n_blocks, -1, 1, block_size//2)) >> torch.tensor([0, 4], device=d.device, dtype=torch.uint8).reshape((1, 1, 2, 1)) |
| 250 | qs = (qs & 0x0F).reshape((n_blocks, -1, 1)).to(torch.int64) |
| 251 | |
| 252 | kvalues = KVALUES.to(qs.device).expand(*qs.shape[:-1], 16) |
| 253 | qs = torch.gather(kvalues, dim=-1, index=qs).reshape((n_blocks, -1)) |
| 254 | del kvalues # should still be view, but just to be safe |
| 255 | |
| 256 | return (d * qs) |
| 257 | |
| 258 | def dequantize_blocks_IQ4_XS(blocks, block_size, type_size, dtype=None): |
| 259 | n_blocks = blocks.shape[0] |
nothing calls this directly
no test coverage detected