MCPcopy
hub / github.com/tinygrad/tinygrad / _get_runner

Function _get_runner

test/mockgpu/amd/emu.py:2054–2084  ·  view source on GitHub ↗

Build and compile instruction to (prg, runtime). Cached by instruction bytes, with canonical dedup.

(inst_bytes: bytes, arch: str = "rdna3")

Source from the content-addressed store, hash-verified

2052
2053@functools.cache
2054def _get_runner(inst_bytes: bytes, arch: str = "rdna3"):
2055 """Build and compile instruction to (prg, runtime). Cached by instruction bytes, with canonical dedup."""
2056 inst = decode_inst(inst_bytes, arch)
2057 inst_size = inst.size()
2058 inst_int = int.from_bytes(inst_bytes[:inst_size], 'little')
2059
2060 # Check if instruction matches any cached canonical pattern (must also match instruction type to avoid variant conflicts)
2061 for inst_type, base, mask, size, entry in _canonical_runner_cache:
2062 if type(inst) is inst_type and inst_size == size and (inst_int & mask) == base: return entry
2063
2064 # Look up handler by type, falling back to base classes for _LIT variants
2065 handler = _INST_HANDLERS.get(type(inst))
2066 if handler is None:
2067 for cls in type(inst).__mro__:
2068 if cls in _INST_HANDLERS:
2069 handler = _INST_HANDLERS[cls]
2070 break
2071 if handler is None: raise RuntimeError(f"[emu] unimplemented instruction type: {type(inst).__name__} {_op_name(inst)}")
2072
2073 ctx = _Ctx(inst_size, _wave_size(arch))
2074 sink = handler(inst, ctx)
2075 base, mask, size = ctx.canonical_mask(inst_bytes)
2076 canonical_name = f"{_op_name(inst).lower()}_{base.to_bytes(size, 'little').hex()}"
2077 sink = sink.replace(arg=KernelInfo(name=canonical_name)).rtag(1)
2078
2079 # NOTE: renderer output is not reproducible because of _MXCSRContext. PROFILE=0 prevents emulator instruction runners from polluting profiling.
2080 with Context(NOOPT=1, CHECK_OOB=0, TUPLE_ORDER=0, EMULATED_DTYPES="", CAPTURE_PROCESS_REPLAY=0, PROFILE=0):
2081 prg = to_program(sink, Device['CPU'].renderer)
2082 runtime = get_runtime('CPU', prg)
2083 _canonical_runner_cache.append((type(inst), base, mask, size, (prg, runtime)))
2084 return prg, runtime
2085
2086_BARRIER_OPS = {ir3.SOPPOp.S_BARRIER, irc.SOPPOp.S_BARRIER}
2087if hasattr(ir4.SOPPOp, 'S_BARRIER_WAIT'): _BARRIER_OPS.add(ir4.SOPPOp.S_BARRIER_WAIT)

Callers 1

_decode_atFunction · 0.85

Calls 15

decode_instFunction · 0.90
KernelInfoClass · 0.90
ContextClass · 0.90
to_programFunction · 0.90
get_runtimeFunction · 0.90
_op_nameFunction · 0.85
_CtxClass · 0.85
_wave_sizeFunction · 0.85
canonical_maskMethod · 0.80
rtagMethod · 0.80
appendMethod · 0.80
sizeMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…