(
w: np.ndarray,
bits = 2,
g = 4,
)
| 526 | |
| 527 | # based on t_mac.utils.preprocess_weights |
| 528 | def preprocess_weights_tl1( |
| 529 | w: np.ndarray, |
| 530 | bits = 2, |
| 531 | g = 4, |
| 532 | ) -> Tuple[np.ndarray, np.ndarray]: |
| 533 | M, K = w.shape |
| 534 | weight = w |
| 535 | weight = np.where(np.abs(weight) < 1e-6, 0, weight).astype(np.float32) |
| 536 | weight = np.sign(weight) |
| 537 | weight_num = np.prod(weight.shape) |
| 538 | model_size = args.model_size |
| 539 | |
| 540 | KEMD = model_config[model_size]['hidden_size'] |
| 541 | # outer loop |
| 542 | BMEMD = 256 |
| 543 | BYEMD = 256 |
| 544 | |
| 545 | # inner loop (32row 32num/16index) |
| 546 | bmEMD = 32 |
| 547 | byEMD = 8 |
| 548 | |
| 549 | KGQA = model_config[model_size]['intermediate_size'] |
| 550 | |
| 551 | BMGQA = 256 |
| 552 | BYGQA = 256 |
| 553 | |
| 554 | bmGQA = 32 |
| 555 | byGQA = 8 |
| 556 | |
| 557 | weight = np.reshape(weight, (weight_num // 2, 2)) |
| 558 | hi_weight = np.multiply(np.split(weight, 2, axis=1)[0], 3) |
| 559 | lo_weight = np.split(weight, 2, axis=1)[1] |
| 560 | |
| 561 | weight = np.reshape((hi_weight + lo_weight), weight_num // 2) |
| 562 | |
| 563 | # row-major index |
| 564 | weight = weight + 4 |
| 565 | weight = np.reshape(weight, (M, K // 2)).astype(np.uint8) |
| 566 | |
| 567 | if K == KEMD: |
| 568 | weight = process_tl1(weight, BMEMD, BYEMD, bmEMD, byEMD, M, K) |
| 569 | elif K == KGQA: |
| 570 | weight = process_tl1(weight, BMGQA, BYGQA, bmGQA, byGQA, M, K) |
| 571 | else: |
| 572 | raise NotImplementedError |
| 573 | |
| 574 | return weight |
| 575 | |
| 576 | |
| 577 | def preprocess_two_weights_tl2(M, K, weight_num, BM, BY, bm, by, weight, final_weight): |
no test coverage detected