(data_loader, prefix="train")
| 102 | logger.log("training classifier model...") |
| 103 | |
| 104 | def forward_backward_log(data_loader, prefix="train"): |
| 105 | batch, extra = next(data_loader) |
| 106 | labels = extra["y"].to(dist_util.dev()) |
| 107 | |
| 108 | batch = batch.to(dist_util.dev()) |
| 109 | # Noisy images |
| 110 | if args.noised: |
| 111 | t, _ = schedule_sampler.sample(batch.shape[0], dist_util.dev()) |
| 112 | batch = diffusion.q_sample(batch, t) |
| 113 | else: |
| 114 | t = th.zeros(batch.shape[0], dtype=th.long, device=dist_util.dev()) |
| 115 | |
| 116 | for i, (sub_batch, sub_labels, sub_t) in enumerate( |
| 117 | split_microbatches(args.microbatch, batch, labels, t) |
| 118 | ): |
| 119 | logits = model(sub_batch, timesteps=sub_t) |
| 120 | loss = F.cross_entropy(logits, sub_labels, reduction="none") |
| 121 | |
| 122 | losses = {} |
| 123 | losses[f"{prefix}_loss"] = loss.detach() |
| 124 | losses[f"{prefix}_acc@1"] = compute_top_k( |
| 125 | logits, sub_labels, k=1, reduction="none" |
| 126 | ) |
| 127 | losses[f"{prefix}_acc@5"] = compute_top_k( |
| 128 | logits, sub_labels, k=5, reduction="none" |
| 129 | ) |
| 130 | log_loss_dict(diffusion, sub_t, losses) |
| 131 | del losses |
| 132 | loss = loss.mean() |
| 133 | if loss.requires_grad: |
| 134 | if i == 0: |
| 135 | mp_trainer.zero_grad() |
| 136 | mp_trainer.backward(loss * len(sub_batch) / len(batch)) |
| 137 | |
| 138 | for step in range(args.iterations - resume_step): |
| 139 | logger.logkv("step", step + resume_step) |
no test coverage detected