(args)
| 7 | |
| 8 | |
| 9 | def main(args): |
| 10 | # Setup PyTorch: |
| 11 | torch.manual_seed(args.seed) |
| 12 | torch.set_grad_enabled(False) |
| 13 | device = "cuda" if torch.cuda.is_available() else "cpu" |
| 14 | |
| 15 | # create and load model |
| 16 | vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder", torch_dtype=torch.float16).to(device) |
| 17 | |
| 18 | # load image |
| 19 | img_path = args.image_path |
| 20 | out_path = args.image_path.replace('.jpg', '_cd.jpg').replace('.jpeg', '_cd.jpeg').replace('.png', '_cd.png') |
| 21 | input_size = args.image_size |
| 22 | img = Image.open(img_path).convert("RGB") |
| 23 | |
| 24 | # preprocess |
| 25 | size_org = img.size |
| 26 | img = img.resize((input_size, input_size)) |
| 27 | img = np.array(img) / 255. |
| 28 | x = 2.0 * img - 1.0 # x value is between [-1, 1] |
| 29 | x = torch.tensor(x) |
| 30 | x = x.unsqueeze(dim=0) |
| 31 | x = torch.einsum('nhwc->nchw', x) |
| 32 | x_input = x.half().to(device) |
| 33 | |
| 34 | # inference |
| 35 | with torch.no_grad(): |
| 36 | # Map input images to latent space + normalize latents: |
| 37 | latent = vae.encode(x_input).latent_dist.sample().mul_(0.18215) |
| 38 | # reconstruct: |
| 39 | output = vae.decode(latent / 0.18215).sample # output value is between [-1, 1] |
| 40 | |
| 41 | # postprocess |
| 42 | output = F.interpolate(output, size=[size_org[1], size_org[0]], mode='bilinear').permute(0, 2, 3, 1)[0] |
| 43 | sample = torch.clamp(127.5 * output + 128.0, 0, 255).to("cpu", dtype=torch.uint8).numpy() |
| 44 | |
| 45 | # save |
| 46 | Image.fromarray(sample).save(out_path) |
| 47 | print("Reconstructed image is saved to {}".format(out_path)) |
| 48 | |
| 49 | |
| 50 |
no test coverage detected