(kernel, device, options)
| 5262 | |
| 5263 | |
| 5264 | def codegen_kernel(kernel, device, options): |
| 5265 | # Update the module's options with the ones defined on the kernel, if any. |
| 5266 | options = options | kernel.options |
| 5267 | |
| 5268 | adj = kernel.adj |
| 5269 | |
| 5270 | args_struct = "" |
| 5271 | if device == "cpu": |
| 5272 | args_struct = f"struct wp_args_{kernel.get_mangled_name()} {{\n" |
| 5273 | for i in adj.args: |
| 5274 | args_struct += f" {i.ctype()} {i.label};\n" |
| 5275 | args_struct += "};\n" |
| 5276 | |
| 5277 | # Build line directive for function definition (subtract 1 to account for 1-indexing of AST line numbers) |
| 5278 | # This is used as a catch-all C-to-Python source line mapping for any code that does not have |
| 5279 | # a direct mapping to a Python source line. |
| 5280 | func_line_directive = "" |
| 5281 | if line_directive := adj.get_line_directive("", adj.fun_def_lineno - 1): |
| 5282 | func_line_directive = f"{line_directive}\n" |
| 5283 | |
| 5284 | if device == "cpu": |
| 5285 | template_forward = cpu_kernel_template_forward |
| 5286 | template_backward = cpu_kernel_template_backward |
| 5287 | elif device == "cuda": |
| 5288 | template_forward = cuda_kernel_template_forward |
| 5289 | template_backward = cuda_kernel_template_backward |
| 5290 | else: |
| 5291 | raise ValueError(f"Device {device} is not supported") |
| 5292 | |
| 5293 | template = "" |
| 5294 | template_fmt_args = { |
| 5295 | "name": kernel.get_mangled_name(), |
| 5296 | "launch_ndim": kernel.adj.kernel_dim, |
| 5297 | } |
| 5298 | |
| 5299 | # Generate launch_bounds string for CUDA kernels |
| 5300 | launch_bounds_str = "" |
| 5301 | if device == "cuda" and "launch_bounds" in options: |
| 5302 | launch_bounds = options["launch_bounds"] |
| 5303 | if isinstance(launch_bounds, int): |
| 5304 | launch_bounds_str = f"__launch_bounds__({launch_bounds}) " |
| 5305 | elif isinstance(launch_bounds, (tuple, list)): |
| 5306 | if len(launch_bounds) == 1: |
| 5307 | launch_bounds_str = f"__launch_bounds__({launch_bounds[0]}) " |
| 5308 | elif len(launch_bounds) == 2: |
| 5309 | launch_bounds_str = f"__launch_bounds__({launch_bounds[0]}, {launch_bounds[1]}) " |
| 5310 | else: |
| 5311 | raise ValueError(f"launch_bounds must be an int or a tuple/list of 1-2 ints, got {launch_bounds}") |
| 5312 | else: |
| 5313 | raise ValueError(f"launch_bounds must be an int or a tuple/list of 1-2 ints, got {type(launch_bounds)}") |
| 5314 | |
| 5315 | # build forward signature |
| 5316 | forward_args = [f"wp::launch_bounds_t<{adj.kernel_dim}> dim"] |
| 5317 | if device == "cpu": |
| 5318 | forward_args.append("size_t task_index") |
| 5319 | else: |
| 5320 | for arg in adj.args: |
| 5321 | forward_args.append(arg.ctype() + " var_" + arg.label) |
nothing calls this directly
no test coverage detected