(blocks, block_size, type_size, dtype=None)
| 69 | return (d * x) |
| 70 | |
| 71 | def dequantize_blocks_Q5_1(blocks, block_size, type_size, dtype=None): |
| 72 | n_blocks = blocks.shape[0] |
| 73 | |
| 74 | d, m, qh, qs = split_block_dims(blocks, 2, 2, 4) |
| 75 | d = d.view(torch.float16).to(dtype) |
| 76 | m = m.view(torch.float16).to(dtype) |
| 77 | qh = to_uint32(qh) |
| 78 | |
| 79 | qh = qh.reshape((n_blocks, 1)) >> torch.arange(32, device=d.device, dtype=torch.int32).reshape(1, 32) |
| 80 | ql = qs.reshape((n_blocks, -1, 1, block_size // 2)) >> torch.tensor([0, 4], device=d.device, dtype=torch.uint8).reshape(1, 1, 2, 1) |
| 81 | qh = (qh & 1).to(torch.uint8) |
| 82 | ql = (ql & 0x0F).reshape((n_blocks, -1)) |
| 83 | |
| 84 | qs = (ql | (qh << 4)) |
| 85 | return (d * qs) + m |
| 86 | |
| 87 | def dequantize_blocks_Q5_0(blocks, block_size, type_size, dtype=None): |
| 88 | n_blocks = blocks.shape[0] |
nothing calls this directly
no test coverage detected