MCPcopy
hub / github.com/tinygrad/tinygrad / pmc_read

Method pmc_read

tinygrad/runtime/ops_amd.py:176–195  ·  view source on GitHub ↗
(self, buf, sched)

Source from the content-addressed store, hash-verified

174 return self.pmc_reset_counters(en=True)
175
176 def pmc_read(self, buf, sched):
177 self.set_grbm()
178 self.wreg(self.gc.regCP_PERFMON_CNTL if self.dev.target[0] <= 11 else self.gc.regCP_PERFMON_CNTL_1, perfmon_state=1, perfmon_sample_enable=1)
179
180 for s in sched:
181 offset = itertools.count(s.off, step=8)
182
183 for xcc in range(s.xcc):
184 with self.pred_exec(xcc_mask=1 << xcc):
185 for inst, se_idx, sa_idx, wgp_idx in itertools.product(range(s.inst), range(s.se), range(s.sa), range(s.wgp)):
186 loff = next(offset)
187 if s.wgp > 1 and not self.dev.iface.is_wgp_active(xcc, se_idx, sa_idx, wgp_idx): continue
188 self.set_grbm(**({'instance':inst} if s.inst > 1 else ({'se':se_idx}|({'sh':sa_idx, 'wgp':wgp_idx} if self.dev.target[0] != 9 else {}))))
189
190 # Copy counter to memory (src_sel = perf, dst_sel = tc_l2)
191 lo, hi = getattr(self.gc, f'{s.regsample}_LO'), getattr(self.gc, f'{s.regsample}_HI', None)
192 self.pkt3(self.pm4.PACKET3_COPY_DATA, (2 << 8) | 4, lo.addr[0], 0, *data64_le(buf.va_addr+loff))
193 if hi is not None: self.pkt3(self.pm4.PACKET3_COPY_DATA, (2 << 8) | 4, hi.addr[0], 0, *data64_le(buf.va_addr+loff+4))
194
195 return self.pmc_reset_counters(en=True)
196
197 ### SQTT ###
198

Callers 1

__call__Method · 0.80

Calls 8

set_grbmMethod · 0.95
wregMethod · 0.95
pred_execMethod · 0.95
pkt3Method · 0.95
pmc_reset_countersMethod · 0.95
data64_leFunction · 0.90
countMethod · 0.45
is_wgp_activeMethod · 0.45

Tested by

no test coverage detected