MCPcopy Index your code
hub / github.com/kijai/ComfyUI-WanVideoWrapper / process

Method process

nodes.py:1743–1819  ·  view source on GitHub ↗
(self, vae, width, height, num_frames, strength, vace_start_percent, vace_end_percent, input_frames=None, ref_images=None, input_masks=None, prev_vace_embeds=None, tiled_vae=False)

Source from the content-addressed store, hash-verified

1741 CATEGORY = "WanVideoWrapper"
1742
1743 def process(self, vae, width, height, num_frames, strength, vace_start_percent, vace_end_percent, input_frames=None, ref_images=None, input_masks=None, prev_vace_embeds=None, tiled_vae=False):
1744 width = (width // 16) * 16
1745 height = (height // 16) * 16
1746
1747 target_shape = (16, (num_frames - 1) // VAE_STRIDE[0] + 1,
1748 height // VAE_STRIDE[1],
1749 width // VAE_STRIDE[2])
1750 # vace context encode
1751 if input_frames is None:
1752 input_frames = torch.zeros((1, 3, num_frames, height, width), device=device, dtype=vae.dtype)
1753 else:
1754 input_frames = input_frames.clone()[:num_frames, :, :, :3]
1755 input_frames = common_upscale(input_frames.movedim(-1, 1), width, height, "lanczos", "disabled").movedim(1, -1)
1756 input_frames = input_frames.to(vae.dtype).to(device).unsqueeze(0).permute(0, 4, 1, 2, 3) # B, C, T, H, W
1757 input_frames = input_frames * 2 - 1
1758 if input_masks is None:
1759 input_masks = torch.ones_like(input_frames, device=device)
1760 else:
1761 log.info(f"input_masks shape: {input_masks.shape}")
1762 input_masks = input_masks[:num_frames]
1763 input_masks = common_upscale(input_masks.clone().unsqueeze(1), width, height, "nearest-exact", "disabled").squeeze(1)
1764 input_masks = input_masks.to(vae.dtype).to(device)
1765 input_masks = input_masks.unsqueeze(-1).unsqueeze(0).permute(0, 4, 1, 2, 3).repeat(1, 3, 1, 1, 1) # B, C, T, H, W
1766
1767 if ref_images is not None:
1768 ref_images = ref_images.clone()[..., :3]
1769 # Create padded image
1770 if ref_images.shape[0] > 1:
1771 ref_images = torch.cat([ref_images[i] for i in range(ref_images.shape[0])], dim=1).unsqueeze(0)
1772
1773 B, H, W, C = ref_images.shape
1774 current_aspect = W / H
1775 target_aspect = width / height
1776 if current_aspect > target_aspect:
1777 # Image is wider than target, pad height
1778 new_h = int(W / target_aspect)
1779 pad_h = (new_h - H) // 2
1780 padded = torch.ones(ref_images.shape[0], new_h, W, ref_images.shape[3], device=ref_images.device, dtype=ref_images.dtype)
1781 padded[:, pad_h:pad_h+H, :, :] = ref_images
1782 ref_images = padded
1783 elif current_aspect < target_aspect:
1784 # Image is taller than target, pad width
1785 new_w = int(H * target_aspect)
1786 pad_w = (new_w - W) // 2
1787 padded = torch.ones(ref_images.shape[0], H, new_w, ref_images.shape[3], device=ref_images.device, dtype=ref_images.dtype)
1788 padded[:, :, pad_w:pad_w+W, :] = ref_images
1789 ref_images = padded
1790 ref_images = common_upscale(ref_images.movedim(-1, 1), width, height, "lanczos", "center").movedim(1, -1)
1791
1792 ref_images = ref_images.to(vae.dtype).to(device).unsqueeze(0).permute(0, 4, 1, 2, 3).unsqueeze(0)
1793 ref_images = ref_images * 2 - 1
1794
1795 vae = vae.to(device)
1796 z0 = self.vace_encode_frames(vae, input_frames, ref_images, masks=input_masks, tiled_vae=tiled_vae)
1797
1798 m0 = self.vace_encode_masks(input_masks, ref_images)
1799 z = self.vace_latent(z0, m0)
1800 vae.to(offload_device)

Callers

nothing calls this directly

Calls 4

vace_encode_framesMethod · 0.95
vace_encode_masksMethod · 0.95
vace_latentMethod · 0.95
toMethod · 0.80

Tested by

no test coverage detected