MCPcopy
hub / github.com/tinygrad/tinygrad / pmc_start

Method pmc_start

tinygrad/runtime/ops_amd.py:151–174  ·  view source on GitHub ↗
(self, counters)

Source from the content-addressed store, hash-verified

149 return self
150
151 def pmc_start(self, counters):
152 self.pmc_reset_counters(en=False)
153 self.wreg(self.gc.regSQ_PERFCOUNTER_CTRL, cs_en=1, ps_en=1, gs_en=1, hs_en=1, **({'vmid_mask':0xffff} if (gfx9:=self.dev.target[0] == 9) else {}))
154 if not gfx9: self.wreg(self.gc.regSQ_PERFCOUNTER_CTRL2, force_en=1, vmid_en=0xffff)
155
156 end_off = 0
157 block2pid:dict[str, itertools.count] = collections.defaultdict(lambda: itertools.count())
158 for name,block,idx in counters:
159 # sq block on gfx11+ goes down to wgps
160 inst_cnt, se_cnt, sa_cnt, wgp_cnt = {"GRBM": (1, 1, 1, 1), "GL2C": (32, 1, 1, 1), "TCC": (16, 1, 1, 1),
161 "SQ": (1, self.dev.se_cnt) + ((1, 1) if gfx9 else (2, self.dev.iface.props['cu_per_simd_array'] // 2))}[block]
162 end_off += (rec_size:=prod((self.dev.xccs, inst_cnt, se_cnt, sa_cnt, wgp_cnt)) * 8)
163
164 # gfx11+ and later require even-numbered SQ *_SELECT registers
165 regsample = f'reg{block}_PERFCOUNTER{(pcid:=next(block2pid[block]))}'
166 if (regsel:=getattr(self.gc, (f'reg{block}_PERFCOUNTER{(pcid*2) if not gfx9 and block=="SQ" else pcid}_SELECT'), None)) is None:
167 raise RuntimeError(f'{block} is out of perfcounter registers: ({regsample} is not found)')
168
169 self.wreg(regsel, perf_sel=idx, **({'simd_mask':0xf, 'sqc_bank_mask':0xf, 'sqc_client_mask':0xf} if gfx9 and block == "SQ" else {}))
170 self.dev.pmc_sched.append(PMCSample(name, block, self.dev.xccs, inst_cnt, se_cnt, sa_cnt, wgp_cnt, end_off-rec_size, rec_size, regsample))
171
172 if gfx9: self.wreg(self.gc.regSQ_PERFCOUNTER_MASK, sh0_mask=0xffff, sh1_mask=0xffff)
173 self.wreg(self.gc.regCOMPUTE_PERFCOUNT_ENABLE, 1)
174 return self.pmc_reset_counters(en=True)
175
176 def pmc_read(self, buf, sched):
177 self.set_grbm()

Callers 1

__init__Method · 0.80

Calls 6

pmc_reset_countersMethod · 0.95
wregMethod · 0.95
prodFunction · 0.90
PMCSampleClass · 0.85
appendMethod · 0.80
countMethod · 0.45

Tested by

no test coverage detected