(self)
| 18 | class TestHCQ(unittest.TestCase): |
| 19 | @classmethod |
| 20 | def setUpClass(self): |
| 21 | TestHCQ.d0 = Device[Device.DEFAULT] |
| 22 | #TestHCQ.d1: AMDDevice = Device["AMD:1"] |
| 23 | TestHCQ.a = Tensor([0.,1.], device=Device.DEFAULT).realize() |
| 24 | TestHCQ.b = self.a + 1 |
| 25 | linear = self.b.schedule_linear() |
| 26 | TestHCQ.prg = to_program(linear.src[-1].src[0], TestHCQ.d0.renderer) |
| 27 | TestHCQ.runtime = get_runtime(TestHCQ.d0.device, TestHCQ.prg) |
| 28 | TestHCQ.b.uop.buffer.allocate() |
| 29 | # wow that's a lot of abstraction layers |
| 30 | TestHCQ.addr = struct.pack("QQ", TestHCQ.b.uop.buffer._buf, TestHCQ.a.uop.buffer._buf) |
| 31 | TestHCQ.addr2 = struct.pack("QQ", TestHCQ.a.uop.buffer._buf, TestHCQ.b.uop.buffer._buf) |
| 32 | TestHCQ.kernargs_off = TestHCQ.runtime.kernargs_offset |
| 33 | TestHCQ.kernargs_size = TestHCQ.runtime.kernargs_alloc_size |
| 34 | ctypes.memmove(TestHCQ.d0.kernargs_ptr+TestHCQ.kernargs_off, TestHCQ.addr, len(TestHCQ.addr)) |
| 35 | ctypes.memmove(TestHCQ.d0.kernargs_ptr+TestHCQ.kernargs_size+TestHCQ.kernargs_off, TestHCQ.addr2, len(TestHCQ.addr2)) |
| 36 | |
| 37 | if Device.DEFAULT == "AMD": |
| 38 | from tinygrad.runtime.ops_amd import HWQueue, HWPM4Queue |
| 39 | TestHCQ.compute_queue = HWPM4Queue |
| 40 | TestHCQ.copy_queue = HWQueue |
| 41 | elif Device.DEFAULT == "NV": |
| 42 | from tinygrad.runtime.ops_nv import HWQueue, HWQueue |
| 43 | # nv need to copy constbuffer there as well |
| 44 | to_mv(TestHCQ.d0.kernargs_ptr, 0x160).cast('I')[:] = array.array('I', TestHCQ.runtime.constbuffer_0) |
| 45 | to_mv(TestHCQ.d0.kernargs_ptr+TestHCQ.kernargs_size, 0x160).cast('I')[:] = array.array('I', TestHCQ.runtime.constbuffer_0) |
| 46 | TestHCQ.compute_queue = HWQueue |
| 47 | TestHCQ.copy_queue = HWQueue |
| 48 | |
| 49 | def setUp(self): |
| 50 | TestHCQ.d0.synchronize() |
nothing calls this directly
no test coverage detected