MCPcopy Index your code
hub / github.com/tinygrad/tinygrad / _submit

Method _submit

tinygrad/runtime/ops_amd.py:454–463  ·  view source on GitHub ↗
(self, dev:AMDDevice)

Source from the content-addressed store, hash-verified

452 return self
453
454 def _submit(self, dev:AMDDevice):
455 cmds = self._cmds if dev == self.binded_device else self._prep_aql(self._q, dev.pm4_ibs.offset(dev.pm4_ib_alloc.alloc(len(self._q) * 4, 16)))
456 aql_bytes = b''.join(bytes(c) if isinstance(c, hsa.hsa_kernel_dispatch_packet_t) else c for c in cmds)
457
458 assert len(aql_bytes) < dev.compute_queue.ring.nbytes, "submit is too large for the queue"
459 cp_bytes = min(len(aql_bytes), (dev.compute_queue.ring.nbytes - (dev.compute_queue.put_value * 64) % dev.compute_queue.ring.nbytes))
460 dev.compute_queue.ring.view(offset=(dev.compute_queue.put_value * 64) % dev.compute_queue.ring.nbytes, fmt='B')[:cp_bytes] = aql_bytes[:cp_bytes]
461 if (tail_bytes:=(len(aql_bytes) - cp_bytes)) > 0: dev.compute_queue.ring.view(offset=0, fmt='B')[:tail_bytes] = aql_bytes[cp_bytes:]
462 dev.compute_queue.put_value += len(aql_bytes) // 64
463 dev.compute_queue.signal_doorbell(dev, doorbell_value=dev.compute_queue.put_value-1)
464
465class AMDCopyQueue(HWQueue):
466 def __init__(self, dev, max_copy_size=0x40000000, queue_idx=0):

Callers

nothing calls this directly

Calls 5

_prep_aqlMethod · 0.95
signal_doorbellMethod · 0.80
offsetMethod · 0.45
allocMethod · 0.45
viewMethod · 0.45

Tested by

no test coverage detected