(backend, dtype, channel_size, read_width)
| 114 | @pytest.mark.parametrize("channel_size", [64, 128]) |
| 115 | @pytest.mark.parametrize("read_width", [1, 2, 4, 8, 16]) |
| 116 | def test_texture_copy(backend, dtype, channel_size, read_width): |
| 117 | remote = get_rpc() |
| 118 | M, N, K = (256, 1024, 128) |
| 119 | lanes = channel_size // DataType(dtype).bits |
| 120 | if read_width > lanes: |
| 121 | return |
| 122 | |
| 123 | @I.ir_module(s_tir=True) |
| 124 | class TextureCopy: |
| 125 | @T.prim_func(s_tir=True) |
| 126 | def main(A: T.Buffer((M, N), dtype), B: T.Buffer((M, N), dtype)): |
| 127 | T.func_attr({"global_symbol": "main"}) |
| 128 | for li, lj in T.grid(M, N): |
| 129 | with T.sblock("Copy"): |
| 130 | i, j = T.axis.remap("SS", [li, lj]) |
| 131 | B[i, j] = A[i, j] |
| 132 | |
| 133 | def schedule_texture_read(sch: s_tir.Schedule): |
| 134 | B_blk = sch.get_sblock("Copy") |
| 135 | Ai_block = sch.cache_read(B_blk, 0, "global.texture") |
| 136 | sch.transform_layout(Ai_block, ("write", 0), lambda i, j: (i, j // lanes, j % lanes)) |
| 137 | |
| 138 | def schedule_default(blk, lanes): |
| 139 | i, j = sch.get_loops(blk) |
| 140 | jo, jv = sch.split(j, [None, lanes]) |
| 141 | |
| 142 | b = sch.fuse(i, jo) |
| 143 | bx, tx = sch.split(b, [None, 256]) |
| 144 | sch.bind(bx, "blockIdx.x") |
| 145 | sch.bind(tx, "threadIdx.x") |
| 146 | |
| 147 | sch.vectorize(jv) |
| 148 | |
| 149 | schedule_default(Ai_block, lanes) |
| 150 | schedule_default(B_blk, read_width) |
| 151 | |
| 152 | mod = TextureCopy |
| 153 | |
| 154 | if remote is None: |
| 155 | target = Target({"kind": backend, "device": "adreno"}) |
| 156 | else: |
| 157 | target = Target( |
| 158 | {"kind": backend, "device": "adreno"}, |
| 159 | {"kind": "llvm", "mtriple": "aarch64-linux-android"}, |
| 160 | ) |
| 161 | |
| 162 | with target: |
| 163 | mod = preprocess_pipeline(mod) |
| 164 | sch = tvm.s_tir.Schedule(mod) |
| 165 | schedule_texture_read(sch) |
| 166 | mod = postprocess_pipeline(sch.mod) |
| 167 | |
| 168 | ex = relax.build(mod, target) |
| 169 | load_path = "vm_library.so" |
| 170 | inputs = [np.random.randint(0, 128, (M, N)).astype(dtype), np.zeros((M, N), dtype)] |
| 171 | with tempfile.TemporaryDirectory() as temp_dir: |
| 172 | if remote is not None: |
| 173 | path = temp_dir + "/" + load_path |
nothing calls this directly
no test coverage detected
searching dependent graphs…