(mat, t:Tensor, dims:int)
| 67 | |
| 68 | # winograd conv 3 kernel f(4x4,3x3) see: http://arxiv.org/abs/1509.09308 |
| 69 | def _apply_winograd_matrix(mat, t:Tensor, dims:int) -> Tensor: |
| 70 | # multiply mat_1 @ mat_2 @ t with foldable constants, where mat_i acts on vector t along dimension i; roughly kron(mat, mat) @ t |
| 71 | # due to realize-before-expand rule in lazy.py, we must operate in this order: reshape -> expand -> arithmetic |
| 72 | t_ = t.reshape(t.shape[:dims] + (1,) * dims + t.shape[dims:]).expand(t.shape[:dims] + (len(mat),) * dims + t.shape[dims:]) # add output dims |
| 73 | # precalculate mat columns for each dim; prod(itertools.product(matcols)) gives the columns of kron(mat, mat, ...) |
| 74 | matcols = _get_winograd_matcols(mat, dims, t_.shape[dims:], t_.device, t_.dtype) |
| 75 | # multiply each element of t_ by the corresponding stacked column of kron(mat, mat), producing only one view for each element of t |
| 76 | ret = sum(prod(col[idx] for col, idx in zip(matcols, mat_is)) * t_[mat_is] for mat_is in itertools.product(range(len(mat[0])), repeat=dims)) |
| 77 | assert isinstance(ret, Tensor), "sum didn't return a Tensor" |
| 78 | return ret |
| 79 | |
| 80 | class Tensor(OpMixin): |
| 81 | """ |
no test coverage detected
searching dependent graphs…