Build and compile instruction to (prg, runtime). Cached by instruction bytes, with canonical dedup.
(inst_bytes: bytes, arch: str = "rdna3")
| 2052 | |
| 2053 | @functools.cache |
| 2054 | def _get_runner(inst_bytes: bytes, arch: str = "rdna3"): |
| 2055 | """Build and compile instruction to (prg, runtime). Cached by instruction bytes, with canonical dedup.""" |
| 2056 | inst = decode_inst(inst_bytes, arch) |
| 2057 | inst_size = inst.size() |
| 2058 | inst_int = int.from_bytes(inst_bytes[:inst_size], 'little') |
| 2059 | |
| 2060 | # Check if instruction matches any cached canonical pattern (must also match instruction type to avoid variant conflicts) |
| 2061 | for inst_type, base, mask, size, entry in _canonical_runner_cache: |
| 2062 | if type(inst) is inst_type and inst_size == size and (inst_int & mask) == base: return entry |
| 2063 | |
| 2064 | # Look up handler by type, falling back to base classes for _LIT variants |
| 2065 | handler = _INST_HANDLERS.get(type(inst)) |
| 2066 | if handler is None: |
| 2067 | for cls in type(inst).__mro__: |
| 2068 | if cls in _INST_HANDLERS: |
| 2069 | handler = _INST_HANDLERS[cls] |
| 2070 | break |
| 2071 | if handler is None: raise RuntimeError(f"[emu] unimplemented instruction type: {type(inst).__name__} {_op_name(inst)}") |
| 2072 | |
| 2073 | ctx = _Ctx(inst_size, _wave_size(arch)) |
| 2074 | sink = handler(inst, ctx) |
| 2075 | base, mask, size = ctx.canonical_mask(inst_bytes) |
| 2076 | canonical_name = f"{_op_name(inst).lower()}_{base.to_bytes(size, 'little').hex()}" |
| 2077 | sink = sink.replace(arg=KernelInfo(name=canonical_name)).rtag(1) |
| 2078 | |
| 2079 | # NOTE: renderer output is not reproducible because of _MXCSRContext. PROFILE=0 prevents emulator instruction runners from polluting profiling. |
| 2080 | with Context(NOOPT=1, CHECK_OOB=0, TUPLE_ORDER=0, EMULATED_DTYPES="", CAPTURE_PROCESS_REPLAY=0, PROFILE=0): |
| 2081 | prg = to_program(sink, Device['CPU'].renderer) |
| 2082 | runtime = get_runtime('CPU', prg) |
| 2083 | _canonical_runner_cache.append((type(inst), base, mask, size, (prg, runtime))) |
| 2084 | return prg, runtime |
| 2085 | |
| 2086 | _BARRIER_OPS = {ir3.SOPPOp.S_BARRIER, irc.SOPPOp.S_BARRIER} |
| 2087 | if hasattr(ir4.SOPPOp, 'S_BARRIER_WAIT'): _BARRIER_OPS.add(ir4.SOPPOp.S_BARRIER_WAIT) |
no test coverage detected
searching dependent graphs…