(x: R.Tensor((16, 16), dtype="float32"))
| 34 | class Module: |
| 35 | @R.function(pure=False) |
| 36 | def main(x: R.Tensor((16, 16), dtype="float32")) -> R.Tensor((16, 16), dtype="float32"): |
| 37 | cls = Module |
| 38 | R.func_attr({"global_symbol": "main"}) |
| 39 | gv: R.Tuple(R.Object, R.Object) = R.call_builtin_with_ctx("vm.builtin.cuda_graph.get_cached_alloc", (cls.cuda_graph_alloc, R.prim_value(0)), sinfo_args=(R.Tuple(R.Object, R.Object),)) |
| 40 | storage: R.Object = gv[0] |
| 41 | alloc = R.vm.alloc_tensor(storage, R.prim_value(0), R.shape((16, 16)), R.dtype("float32")) |
| 42 | _: R.Tuple = cls.add(x, alloc) |
| 43 | storage1: R.Object = gv[1] |
| 44 | gv1: R.Tuple(R.Tensor(dtype="float32"), R.Object, R.Object) = (alloc, storage1, storage) |
| 45 | gv2: R.Tuple(R.Tensor((16, 16), dtype="float32")) = R.call_builtin_with_ctx("vm.builtin.cuda_graph.run_or_capture", (cls.cuda_graph_capture, gv1, R.prim_value(0)), sinfo_args=(R.Tuple(R.Tensor((16, 16), dtype="float32")),)) |
| 46 | storage2: R.Object = R.vm.alloc_storage(R.shape((1024,)), R.prim_value(0), R.dtype("uint8")) |
| 47 | alloc3 = R.vm.alloc_tensor(storage2, R.prim_value(0), R.shape((16, 16)), R.dtype("float32")) |
| 48 | lv4: R.Tensor((16, 16), dtype="float32") = gv2[0] |
| 49 | _3: R.Tuple = cls.add(lv4, alloc3) |
| 50 | lv5: R.Tensor(dtype="float32") = alloc3 |
| 51 | return lv5 |
| 52 | |
| 53 | @T.prim_func(s_tir=True) |
| 54 | def add(A: T.Buffer((16, 16), "float32"), B: T.Buffer((16, 16), "float32")): |
no test coverage detected