r"""Execute forward propagation Arguments: *inputs: Variable length input list **kwargs: variable length keyword arguments
(self, *inputs, **kwargs)
| 2483 | |
| 2484 | @instrument_w_nvtx |
| 2485 | def forward(self, *inputs, **kwargs): |
| 2486 | r"""Execute forward propagation |
| 2487 | Arguments: |
| 2488 | *inputs: Variable length input list |
| 2489 | **kwargs: variable length keyword arguments |
| 2490 | """ |
| 2491 | # Clear the backward seen flag at the start of each forward pass. |
| 2492 | # This is used to track multiple gradient hook phases with reentrant checkpointing. |
| 2493 | if isinstance(self.optimizer, ZeROOptimizer): |
| 2494 | self.optimizer.clear_backward_seen_flag() |
| 2495 | |
| 2496 | if self.autotuning_profile_model_info(): |
| 2497 | ma = get_ma_status() |
| 2498 | |
| 2499 | if self.is_deepcompile_enabled() and not self.is_deepcompile_active() and not self.is_compiled: |
| 2500 | log_dist_once( |
| 2501 | "DeepCompile is enabled but engine.compile() has not been called; executing without DeepCompile until compile() runs.", |
| 2502 | ranks=[0]) |
| 2503 | |
| 2504 | if self.is_deepcompile_active() and hasattr(self, "launch_compile_passes"): |
| 2505 | # We can't have this in forward prologue as the compiler compiles hooks including the forward prologue. |
| 2506 | self.launch_compile_passes(self.global_steps) |
| 2507 | |
| 2508 | with autocast_if_enabled(self): |
| 2509 | loss = self.module(*inputs, **kwargs) |
| 2510 | |
| 2511 | # Register output backward hooks |
| 2512 | # preprocess_once_fn is called for preprocessing |
| 2513 | # preprocess_per_tensor_fn scales a tensor for gradient accumulation |
| 2514 | register_output_backward_hooks(loss, |
| 2515 | preprocess_once_fn=self._backward_prologue, |
| 2516 | preprocess_per_tensor_fn=self._backward_prologue_per_tensor) |
| 2517 | |
| 2518 | if self.autotuning_profile_model_info(): |
| 2519 | activation_mem = get_ma_status() - ma |
| 2520 | self.autotuning_model_info["activation_mem_per_gpu"] = activation_mem |
| 2521 | print_json_dist(self.autotuning_model_info, [0], path=self.autotuning_model_info_path()) |
| 2522 | exit() |
| 2523 | |
| 2524 | return loss |
| 2525 | |
| 2526 | def _cast_inputs_half(self, inputs): |
| 2527 | if isinstance(inputs, (list, tuple)): |
nothing calls this directly
no test coverage detected