MCPcopy Index your code
hub / github.com/tinygrad/tinygrad / __call__

Method __call__

tinygrad/runtime/ops_nv.py:326–337  ·  view source on GitHub ↗
(self, *bufs, global_size:tuple[int,int,int]=(1,1,1), local_size:tuple[int,int,int]=(1,1,1), vals:tuple[int|None, ...]=(),
               wait=False, timeout:int|None=None)

Source from the content-addressed store, hash-verified

324 start_off += (sz if typ == 0x4 else 0) + 4
325
326 def __call__(self, *bufs, global_size:tuple[int,int,int]=(1,1,1), local_size:tuple[int,int,int]=(1,1,1), vals:tuple[int|None, ...]=(),
327 wait=False, timeout:int|None=None):
328 if prod(local_size) > 1024 or self.max_threads < prod(local_size) or self.lcmem_usage > cast(NVDevice, self.dev).slm_per_thread:
329 raise RuntimeError(f"Too many resources requested for launch, {prod(local_size)=}, {self.max_threads=}")
330 if any(cur > mx for cur,mx in zip(global_size, [2147483647, 65535, 65535])) or any(cur > mx for cur,mx in zip(local_size, [1024, 1024, 64])):
331 raise RuntimeError(f"Invalid global/local dims {global_size=}, {local_size=}")
332 res = super().__call__(*bufs, global_size=global_size, local_size=local_size, vals=vals, wait=wait, timeout=timeout)
333 if self.dev.pma_enabled:
334 self.dev.synchronize()
335 if pma_blob:=self.dev._prof_readback():
336 Compiled.profile_events += [ProfilePMAEvent(self.dev.device, self.name, pma_blob, self.dev.prof_exec_counter)]
337 return res
338
339class NVAllocator(HCQAllocator['NVDevice']):
340 def _alloc(self, size:int, options:BufferSpec) -> HCQBuffer:

Callers

nothing calls this directly

Calls 5

prodFunction · 0.90
castFunction · 0.85
ProfilePMAEventClass · 0.85
_prof_readbackMethod · 0.80
synchronizeMethod · 0.45

Tested by

no test coverage detected