(self, dev, name:str, lib:bytes, runtimevars:dict[str, int]|None=None, **kwargs)
| 78 | except OSError: pass |
| 79 | |
| 80 | def __init__(self, dev, name:str, lib:bytes, runtimevars:dict[str, int]|None=None, **kwargs): |
| 81 | self.runtimevars = runtimevars or {} |
| 82 | |
| 83 | LVP = isinstance(dev.renderer, LVPRenderer) |
| 84 | if sys.platform == "win32": # mypy doesn't understand when WIN is used here |
| 85 | PAGE_EXECUTE_READWRITE, MEM_COMMIT, MEM_RESERVE = 0x40, 0x1000, 0x2000 |
| 86 | ctypes.windll.kernel32.VirtualAlloc.restype = ctypes.c_void_p |
| 87 | self.mem = ctypes.windll.kernel32.VirtualAlloc(ctypes.c_void_p(0), ctypes.c_size_t(len(lib)), MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE) |
| 88 | ctypes.memmove(self.mem, lib, len(lib)) |
| 89 | ctypes.windll.kernel32.GetCurrentProcess.restype = ctypes.c_void_p |
| 90 | proc = ctypes.windll.kernel32.GetCurrentProcess() |
| 91 | ctypes.windll.kernel32.FlushInstructionCache(ctypes.c_void_p(proc), ctypes.c_void_p(self.mem), ctypes.c_size_t(len(lib))) |
| 92 | self.fxn = ctypes.CFUNCTYPE(None)(self.mem) |
| 93 | else: |
| 94 | # On apple silicon with SPRR enabled (it always is in macos) RWX pages are unrepresentable: https://blog.svenpeter.dev/posts/m1_sprr_gxf/ |
| 95 | # MAP_JIT allows us to easily flip pages from RW- to R-X and vice versa. It is a noop on intel cpus. (man pthread_jit_write_protect_np) |
| 96 | self.mem = mmap.mmap(-1, len(lib), mmap.MAP_ANON|mmap.MAP_PRIVATE|(MAP_JIT if OSX else 0), mmap.PROT_READ|mmap.PROT_WRITE|mmap.PROT_EXEC) |
| 97 | |
| 98 | if OSX: unwrap(CPUProgram.rt_lib).pthread_jit_write_protect_np(False) |
| 99 | if LVP: lib = jit_loader(lib, base=ctypes.addressof(ctypes.c_void_p.from_buffer(self.mem)), link_libs=['m']) |
| 100 | self.mem.write(lib) |
| 101 | if OSX: unwrap(CPUProgram.rt_lib).pthread_jit_write_protect_np(True) |
| 102 | |
| 103 | # __clear_cache isn't a normal libc function, but a compiler support routine found in libgcc_s for gcc and compiler-rt for clang. |
| 104 | # libgcc_s comes as shared library but compiler-rt is only a bunch of static library archives which we can't directly load, but fortunately |
| 105 | # it somehow found its way into libSystem on macos (likely because it used __builtin_clear_cache) and libgcc_s is ~always present on linux |
| 106 | # Using ["name"] instead of .name because otherwise name is getting mangled: https://docs.python.org/3.12/reference/expressions.html#index-5 |
| 107 | if CPUProgram.rt_lib is not None: |
| 108 | CPUProgram.rt_lib["__clear_cache"](ctypes.c_void_p(mv_address(self.mem)), ctypes.c_void_p(mv_address(self.mem) + len(lib))) |
| 109 | else: |
| 110 | # msync should be a universal POSIX way to do this |
| 111 | from tinygrad.runtime.autogen import libc |
| 112 | libc.msync(ctypes.c_void_p(mv_address(self.mem)), len(lib), libc.MS_SYNC | libc.MS_INVALIDATE) |
| 113 | |
| 114 | self.fxn = ctypes.CFUNCTYPE(None)(mv_address(self.mem)) |
| 115 | |
| 116 | super().__init__(LVPArgsState if LVP else HCQArgsState, dev, name, kernargs_alloc_size=12+256 if LVP else 0) |
| 117 | |
| 118 | @suppress_finalizing |
| 119 | def __del__(self): |
nothing calls this directly
no test coverage detected