(o1:UOp, o2:UOp, A:UOp, B:UOp)
| 316 | Tensor.realize(a, b) |
| 317 | |
| 318 | def custom_add_with_tmp(o1:UOp, o2:UOp, A:UOp, B:UOp) -> UOp: |
| 319 | o1,o2,A,B = o1.flatten(), o2.flatten(), A.flatten(), B.flatten() |
| 320 | i = UOp.range(o1.numel(), 0) |
| 321 | store_o1 = o1[i].store(A[i]+B[i]) |
| 322 | store_o2 = o2[i].store(A[i]+B[i]+2) |
| 323 | return UOp.group(store_o1, store_o2).end(i).sink(arg=KernelInfo(name=f"add_with_tmp_{o1.numel()}")).simplify() |
| 324 | |
| 325 | from tinygrad import function |
| 326 | @function(precompile=True) |