Log. Args: epoch: TODO. batch_idx: TODO. step_in_epoch: TODO. batch_num_epoch: TODO. lr: TODO. loss: TODO. speed_stats: TODO. stats: TODO.
(
self,
epoch=0,
batch_idx=0,
step_in_epoch=0,
batch_num_epoch=-1,
lr=0.0,
loss=0.0,
speed_stats=None,
stats=None,
writer=None,
tag="train",
data_split_i=0,
data_split_num=1,
log_step=None,
**kwargs,
)
| 654 | iterator_stop = torch.tensor(0).to(self.device) |
| 655 | |
| 656 | def log( |
| 657 | self, |
| 658 | epoch=0, |
| 659 | batch_idx=0, |
| 660 | step_in_epoch=0, |
| 661 | batch_num_epoch=-1, |
| 662 | lr=0.0, |
| 663 | loss=0.0, |
| 664 | speed_stats=None, |
| 665 | stats=None, |
| 666 | writer=None, |
| 667 | tag="train", |
| 668 | data_split_i=0, |
| 669 | data_split_num=1, |
| 670 | log_step=None, |
| 671 | **kwargs, |
| 672 | ): |
| 673 | |
| 674 | """Log. |
| 675 | |
| 676 | Args: |
| 677 | epoch: TODO. |
| 678 | batch_idx: TODO. |
| 679 | step_in_epoch: TODO. |
| 680 | batch_num_epoch: TODO. |
| 681 | lr: TODO. |
| 682 | loss: TODO. |
| 683 | speed_stats: TODO. |
| 684 | stats: TODO. |
| 685 | writer: TODO. |
| 686 | tag: TODO. |
| 687 | data_split_i: TODO. |
| 688 | data_split_num: TODO. |
| 689 | log_step: TODO. |
| 690 | **kwargs: Additional keyword arguments. |
| 691 | """ |
| 692 | if (batch_idx + 1) % self.log_interval == 0: |
| 693 | batch_idx = log_step if log_step is not None else batch_idx |
| 694 | gpu_info = ( |
| 695 | "GPU, memory: usage: {:.3f} GB, " |
| 696 | "peak: {:.3f} GB, " |
| 697 | "cache: {:.3f} GB, " |
| 698 | "cache_peak: {:.3f} GB".format( |
| 699 | torch.cuda.memory_allocated() / 1024 / 1024 / 1024, |
| 700 | torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024, |
| 701 | torch.cuda.memory_reserved() / 1024 / 1024 / 1024, |
| 702 | torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, |
| 703 | ) |
| 704 | ) |
| 705 | |
| 706 | loss_avg_epoch = getattr(self, f"{tag}_loss_avg") |
| 707 | acc_avg_epoch = getattr(self, f"{tag}_acc_avg") |
| 708 | description = ( |
| 709 | f"{tag}, " |
| 710 | f"rank: {self.rank}, " |
| 711 | f"epoch: {epoch}/{self.max_epoch}, " |
| 712 | f"data_slice: {data_split_i}/{data_split_num}, " |
| 713 | f"step_in_slice: {batch_idx + 1}/{batch_num_epoch}, step_in_epoch: {step_in_epoch}, total step: {self.batch_total}, " |
no outgoing calls
no test coverage detected