(self)
| 193 | |
| 194 | class AttentionHooks(HookCollection): |
| 195 | def __init__(self): |
| 196 | super().__init__() |
| 197 | self.add_subhooks("q", FwdBwdHooks()) |
| 198 | self.add_subhooks("k", FwdBwdHooks()) |
| 199 | self.add_subhooks("v", FwdBwdHooks()) |
| 200 | self.add_subhooks("qk_logits", FwdBwdHooks()) |
| 201 | self.add_subhooks("qk_softmax_denominator", FwdBwdHooks()) |
| 202 | self.add_subhooks("qk_probs", FwdBwdHooks()) |
| 203 | self.add_subhooks("v_out", FwdBwdHooks()) # pre-final projection |
| 204 | |
| 205 | |
| 206 | class ResidualStreamTorsoHooks(HookCollection): |
nothing calls this directly
no test coverage detected