MCPcopy
hub / github.com/tinygrad/tinygrad / sqtt_stop

Method sqtt_stop

tinygrad/runtime/ops_amd.py:290–318  ·  view source on GitHub ↗
(self, wptrs:HCQBuffer)

Source from the content-addressed store, hash-verified

288
289 # Magic values from src/amd/common/ac_sqtt.c:ac_sqtt_emit_stop and src/amd/common/ac_sqtt.c:ac_sqtt_emit_wait
290 def sqtt_stop(self, wptrs:HCQBuffer):
291 self.memory_barrier()
292 self.set_grbm()
293
294 # Start shutting everything down
295 if self.dev.target[0] == 9: self.wreg(self.gc.regSQ_THREAD_TRACE_MODE, mask_cs=1, autoflush_en=1, mode=0)
296 else:
297 self.wreg(self.gc.regCOMPUTE_THREAD_TRACE_ENABLE, 0)
298 self.pkt3(self.pm4.PACKET3_EVENT_WRITE, self.pm4.EVENT_TYPE(self.soc.THREAD_TRACE_FINISH) | self.pm4.EVENT_INDEX(0))
299
300 # For each SE wait for finish to complete and copy regSQ_THREAD_TRACE_WPTR to know where in the buffer trace data ends
301 for se in range(self.dev.se_cnt * self.dev.xccs):
302 with self.pred_exec(xcc_mask=1<<(se // self.dev.se_cnt)):
303 self.set_grbm(se=se % self.dev.se_cnt, sh=0)
304
305 regstatus = self.gc.regSQ_THREAD_TRACE_STATUS.addr[0] - (self.pm4.PACKET3_SET_UCONFIG_REG_START if self.dev.target[0] == 9 else 0)
306 if self.dev.target[0] != 9:
307 self.wait_reg_mem(reg=regstatus, mask=self.gc.regSQ_THREAD_TRACE_STATUS.fields_mask('finish_pending'), op=WAIT_REG_MEM_FUNCTION_EQ, value=0)
308 self.sqtt_config(tracing=False)
309 self.wait_reg_mem(reg=regstatus, mask=self.gc.regSQ_THREAD_TRACE_STATUS.fields_mask('busy'), op=WAIT_REG_MEM_FUNCTION_EQ, value=0)
310 self.pkt3(self.pm4.PACKET3_EVENT_WRITE, self.pm4.EVENT_TYPE(self.soc.CS_PARTIAL_FLUSH) | self.pm4.EVENT_INDEX(EVENT_INDEX_PARTIAL_FLUSH))
311
312 # Copy WPTR to memory (src_sel = perf, dst_sel = tc_l2, wr_confirm = True)
313 self.pkt3(self.pm4.PACKET3_COPY_DATA, 1 << 20 | 2 << 8 | 4, self.gc.regSQ_THREAD_TRACE_WPTR.addr[0], 0, *data64_le(wptrs.va_addr+(se*4)))
314
315 self.set_grbm()
316 if self.dev.target[0] != 9: self.spi_config(tracing=False)
317 self.memory_barrier()
318 return self
319
320 def exec(self, prg:AMDProgram, args_state:CLikeArgsState, global_size:tuple[sint, ...], local_size:tuple[sint, ...]):
321 self.bind_args_state(args_state)

Callers 1

__call__Method · 0.80

Calls 10

memory_barrierMethod · 0.95
set_grbmMethod · 0.95
wregMethod · 0.95
pkt3Method · 0.95
pred_execMethod · 0.95
wait_reg_memMethod · 0.95
sqtt_configMethod · 0.95
spi_configMethod · 0.95
data64_leFunction · 0.90
fields_maskMethod · 0.80

Tested by

no test coverage detected