(self, dev:AMDDevice)
| 452 | return self |
| 453 | |
| 454 | def _submit(self, dev:AMDDevice): |
| 455 | cmds = self._cmds if dev == self.binded_device else self._prep_aql(self._q, dev.pm4_ibs.offset(dev.pm4_ib_alloc.alloc(len(self._q) * 4, 16))) |
| 456 | aql_bytes = b''.join(bytes(c) if isinstance(c, hsa.hsa_kernel_dispatch_packet_t) else c for c in cmds) |
| 457 | |
| 458 | assert len(aql_bytes) < dev.compute_queue.ring.nbytes, "submit is too large for the queue" |
| 459 | cp_bytes = min(len(aql_bytes), (dev.compute_queue.ring.nbytes - (dev.compute_queue.put_value * 64) % dev.compute_queue.ring.nbytes)) |
| 460 | dev.compute_queue.ring.view(offset=(dev.compute_queue.put_value * 64) % dev.compute_queue.ring.nbytes, fmt='B')[:cp_bytes] = aql_bytes[:cp_bytes] |
| 461 | if (tail_bytes:=(len(aql_bytes) - cp_bytes)) > 0: dev.compute_queue.ring.view(offset=0, fmt='B')[:tail_bytes] = aql_bytes[cp_bytes:] |
| 462 | dev.compute_queue.put_value += len(aql_bytes) // 64 |
| 463 | dev.compute_queue.signal_doorbell(dev, doorbell_value=dev.compute_queue.put_value-1) |
| 464 | |
| 465 | class AMDCopyQueue(HWQueue): |
| 466 | def __init__(self, dev, max_copy_size=0x40000000, queue_idx=0): |
nothing calls this directly
no test coverage detected