MCPcopy
hub / github.com/tinygrad/tinygrad / custom_data_deps

Function custom_data_deps

test/amd/test_custom_kernel.py:148–162  ·  view source on GitHub ↗
(A:UOp)

Source from the content-addressed store, hash-verified

146 return UOp(Ops.PROGRAM, src=(sink, UOp(Ops.DEVICE, arg="AMD"), UOp(Ops.LINEAR, src=tuple([UOp(Ops.INS, arg=x) for x in insts]))))
147
148def custom_data_deps(A:UOp) -> UOp:
149 A = A.flatten()
150 threads = UOp.special(A.numel(), "lidx0")
151 k = Kernel()
152 k.emit(s_load_b64(s[0:1], s[0:1], soffset=NULL))
153 k.emit(s_waitcnt_lgkmcnt(sdst=NULL, simm16=0))
154 k.emit(v_lshlrev_b32_e32(v[0], 2, v[0]))
155 k.emit(global_load_b32(v[1], v[0], saddr=s[0:1]))
156 k.emit(s_waitcnt_vmcnt(sdst=NULL, simm16=0))
157 k.emit(v_add_f32_e32(v[1], 1.0, v[1]))
158 k.emit(global_store_b32(addr=v[0], data=v[1], saddr=s[0:1]))
159 k.emit(s_endpgm())
160 insts = k.finalize()
161 sink = UOp.sink(A.base, threads, arg=KernelInfo("custom_data_deps"))
162 return UOp(Ops.PROGRAM, src=(sink, UOp(Ops.DEVICE, arg="AMD"), UOp(Ops.LINEAR, src=tuple([UOp(Ops.INS, arg=x) for x in insts]))))
163
164@unittest.skipUnless(Device.DEFAULT == "AMD", "requires AMD device")
165class TestCustomKernel(unittest.TestCase):

Callers

nothing calls this directly

Calls 9

emitMethod · 0.95
finalizeMethod · 0.95
KernelClass · 0.90
KernelInfoClass · 0.90
UOpClass · 0.90
flattenMethod · 0.80
specialMethod · 0.80
numelMethod · 0.80
sinkMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…