| 7 | PSEUDO_OPS = {Ops.CONST, Ops.NOOP, Ops.AFTER, Ops.BARRIER, Ops.GROUP} |
| 8 | |
| 9 | class LinearScanRegallocContext: |
| 10 | # returns the uop that defines the virtual register |
| 11 | def vdef(self, v:Register) -> UOp: return self.uops[self.live_range[v][0]] |
| 12 | def __init__(self, uops:list[UOp], ren:ISARenderer): |
| 13 | self.uops = uops |
| 14 | self.ren = ren |
| 15 | self.idx = itertools.count() |
| 16 | # the label associated with each loop NOTE: this is only used post regalloc and should be removed |
| 17 | self.loop_label: dict[UOp, str] = {} |
| 18 | |
| 19 | # compute live ranges |
| 20 | self.live_range: dict[Register, list[int]] = {} |
| 21 | lr = self.live_range |
| 22 | ranges: list[Register] = [] |
| 23 | for i,u in enumerate(reversed(uops)): |
| 24 | if u.op in PSEUDO_OPS: continue |
| 25 | defs = u.tag if isinstance(u.tag, tuple) else () |
| 26 | for v in defs + tuple(s.reg for s in dedup(u.src)): |
| 27 | if isinstance(v, Register): lr.setdefault(v, []).insert(0, len(uops) - 1 - i) |
| 28 | for v in defs: |
| 29 | if v in lr and (n:=max((lr[rng][-1] for rng in ranges if lr[rng][0] <= lr[v][-1] < lr[rng][-1]), default=None)): lr[v].append(n) |
| 30 | if u.op is Ops.RANGE: ranges.append(u.reg) |
| 31 | |
| 32 | # allocate registers |
| 33 | self.stack_size: int = 0 |
| 34 | self.locals: dict[UOp, UOp] = {} |
| 35 | self.spills: dict[Register, UOp] = {} # mapping from virtual to stack slot |
| 36 | self.reals: dict[int, dict[Register, Register]] = {} # mapping from virtual to real at each program point |
| 37 | self.insert_before: dict[int, list[tuple[Register, Register]]] = {} # fills to be inserted at each program point |
| 38 | live: dict[Register, Register] = {} # mapping from virtual to real that's currently assigned to it |
| 39 | live_ins: list[dict[Register, Register]] = [] # mapping from virtual to real at loop entry |
| 40 | |
| 41 | def alloc(cons:tuple[Register, ...], i:int) -> Register: |
| 42 | live_inv = {v:k for k,v in live.items()} |
| 43 | # allocate the best register. Registers not in live or not used again are free and have priority, |
| 44 | # otherwise pick the one with the furthest next use. Regs that appear first in cons have priority in case of a tie |
| 45 | reg,vreg = max(((r,live_inv.get(r)) for r in cons), |
| 46 | key=lambda rv: next((j-i for j in ([] if rv[1] is None else lr[rv[1]]) if j >= i), len(uops))) |
| 47 | return live.pop(vreg) if vreg is not None else reg |
| 48 | |
| 49 | # assign register to spilled virtual and record load to be emitted before current uop, also assign it a stack slot |
| 50 | def fill(v:Register, i:int, cons:tuple[Register, ...]|None=None) -> Register: |
| 51 | if v not in self.spills: |
| 52 | dt = self.vdef(v).dtype |
| 53 | sz = dt.scalar().itemsize * dt.count if not isinstance(dt, PtrDType) else 8 |
| 54 | offset = self.stack_size + (sz - self.stack_size % sz) % sz |
| 55 | self.spills[v] = UOp.const(dtypes.int32, offset) |
| 56 | self.stack_size = offset + sz |
| 57 | r = alloc(cons if cons is not None else v.cons, i) |
| 58 | self.insert_before.setdefault(i, []).append((v, r)) |
| 59 | return r |
| 60 | |
| 61 | for i,u in enumerate(uops): |
| 62 | if u.op in PSEUDO_OPS: continue |
| 63 | # allocate uses |
| 64 | for s in u.src: |
| 65 | # HACK: cause of later hacks to lower range |
| 66 | if u.op is Ops.END: continue |
no outgoing calls
no test coverage detected
searching dependent graphs…