(self, wptrs:HCQBuffer)
| 288 | |
| 289 | # Magic values from src/amd/common/ac_sqtt.c:ac_sqtt_emit_stop and src/amd/common/ac_sqtt.c:ac_sqtt_emit_wait |
| 290 | def sqtt_stop(self, wptrs:HCQBuffer): |
| 291 | self.memory_barrier() |
| 292 | self.set_grbm() |
| 293 | |
| 294 | # Start shutting everything down |
| 295 | if self.dev.target[0] == 9: self.wreg(self.gc.regSQ_THREAD_TRACE_MODE, mask_cs=1, autoflush_en=1, mode=0) |
| 296 | else: |
| 297 | self.wreg(self.gc.regCOMPUTE_THREAD_TRACE_ENABLE, 0) |
| 298 | self.pkt3(self.pm4.PACKET3_EVENT_WRITE, self.pm4.EVENT_TYPE(self.soc.THREAD_TRACE_FINISH) | self.pm4.EVENT_INDEX(0)) |
| 299 | |
| 300 | # For each SE wait for finish to complete and copy regSQ_THREAD_TRACE_WPTR to know where in the buffer trace data ends |
| 301 | for se in range(self.dev.se_cnt * self.dev.xccs): |
| 302 | with self.pred_exec(xcc_mask=1<<(se // self.dev.se_cnt)): |
| 303 | self.set_grbm(se=se % self.dev.se_cnt, sh=0) |
| 304 | |
| 305 | regstatus = self.gc.regSQ_THREAD_TRACE_STATUS.addr[0] - (self.pm4.PACKET3_SET_UCONFIG_REG_START if self.dev.target[0] == 9 else 0) |
| 306 | if self.dev.target[0] != 9: |
| 307 | self.wait_reg_mem(reg=regstatus, mask=self.gc.regSQ_THREAD_TRACE_STATUS.fields_mask('finish_pending'), op=WAIT_REG_MEM_FUNCTION_EQ, value=0) |
| 308 | self.sqtt_config(tracing=False) |
| 309 | self.wait_reg_mem(reg=regstatus, mask=self.gc.regSQ_THREAD_TRACE_STATUS.fields_mask('busy'), op=WAIT_REG_MEM_FUNCTION_EQ, value=0) |
| 310 | self.pkt3(self.pm4.PACKET3_EVENT_WRITE, self.pm4.EVENT_TYPE(self.soc.CS_PARTIAL_FLUSH) | self.pm4.EVENT_INDEX(EVENT_INDEX_PARTIAL_FLUSH)) |
| 311 | |
| 312 | # Copy WPTR to memory (src_sel = perf, dst_sel = tc_l2, wr_confirm = True) |
| 313 | self.pkt3(self.pm4.PACKET3_COPY_DATA, 1 << 20 | 2 << 8 | 4, self.gc.regSQ_THREAD_TRACE_WPTR.addr[0], 0, *data64_le(wptrs.va_addr+(se*4))) |
| 314 | |
| 315 | self.set_grbm() |
| 316 | if self.dev.target[0] != 9: self.spi_config(tracing=False) |
| 317 | self.memory_barrier() |
| 318 | return self |
| 319 | |
| 320 | def exec(self, prg:AMDProgram, args_state:CLikeArgsState, global_size:tuple[sint, ...], local_size:tuple[sint, ...]): |
| 321 | self.bind_args_state(args_state) |
no test coverage detected