(adj, c_func_name: str, device="cpu", options=None, forward_only=False, reverse_only=False)
| 5016 | |
| 5017 | |
| 5018 | def codegen_func(adj, c_func_name: str, device="cpu", options=None, forward_only=False, reverse_only=False): |
| 5019 | if options is None: |
| 5020 | options = {} |
| 5021 | |
| 5022 | # Build line directive for function definition (subtract 1 to account for 1-indexing of AST line numbers) |
| 5023 | # This is used as a catch-all C-to-Python source line mapping for any code that does not have |
| 5024 | # a direct mapping to a Python source line. |
| 5025 | func_line_directive = "" |
| 5026 | if line_directive := adj.get_line_directive("", adj.fun_def_lineno - 1): |
| 5027 | func_line_directive = f"{line_directive}\n" |
| 5028 | |
| 5029 | # forward header |
| 5030 | if adj.return_var is not None and len(adj.return_var) == 1: |
| 5031 | return_type = adj.return_var[0].ctype() |
| 5032 | else: |
| 5033 | return_type = "void" |
| 5034 | |
| 5035 | has_multiple_outputs = adj.return_var is not None and len(adj.return_var) != 1 |
| 5036 | |
| 5037 | forward_args = [] |
| 5038 | reverse_args = [] |
| 5039 | |
| 5040 | # Tile parameters are emitted as C++ template parameters so that the |
| 5041 | # same @wp.func can accept tiles with any storage type (register or |
| 5042 | # shared) without requiring separate overloads. The Python-level tile |
| 5043 | # annotation (e.g. wp.tile[float, M, N]) defaults to |
| 5044 | # register storage, but at the call site the tile may actually live in |
| 5045 | # shared memory. By generating ``template<typename tile_t>`` instead |
| 5046 | # of the concrete ``tile_register_t<...>`` type, C++ template argument |
| 5047 | # deduction resolves the correct storage type automatically. |
| 5048 | # |
| 5049 | # Tile parameters are passed by non-const reference (not by value) |
| 5050 | # for two reasons: (1) owning shared tiles (tile_shared_t with |
| 5051 | # Owner=true) cannot be copied (static_assert in copy constructor), |
| 5052 | # and (2) adjoint built-ins like adj_tile_sum() expect non-const |
| 5053 | # Tile& parameters. |
| 5054 | # |
| 5055 | # This is a semantic change for register tiles, which were previously |
| 5056 | # passed by value. The difference is observable for in-place tile |
| 5057 | # operations (e.g., a += b where both are tiles), which mutate the |
| 5058 | # parameter directly. Simple rebinding (a = expr) creates a new C++ |
| 5059 | # variable: for register tiles this is a full value copy, for shared |
| 5060 | # tiles a non-owning handle to the same shared memory (element-level |
| 5061 | # writes through either variable affect the same data). |
| 5062 | # The pass-by-reference behavior for in-place ops is intentional: |
| 5063 | # it matches the Python semantics where augmented assignment on a |
| 5064 | # mutable object modifies it in place. |
| 5065 | template_params = [] |
| 5066 | |
| 5067 | # forward args |
| 5068 | for i, arg in enumerate(adj.args): |
| 5069 | if is_tile(arg.type) or is_tile_stack(arg.type): |
| 5070 | tname = f"tile_{arg.label}" |
| 5071 | template_params.append(tname) |
| 5072 | s = f"{tname}& {arg.emit()}" |
| 5073 | else: |
| 5074 | s = f"{arg.ctype()} {arg.emit()}" |
| 5075 | forward_args.append(s) |
nothing calls this directly
no test coverage detected