MCPcopy
hub / github.com/zai-org/CogVideo / forward

Method forward

inference/gradio_composite_demo/rife/IFNet_m.py:70–127  ·  view source on GitHub ↗
(self, x, scale=[4, 2, 1], timestep=0.5, returnflow=False)

Source from the content-addressed store, hash-verified

68 self.unet = Unet()
69
70 def forward(self, x, scale=[4, 2, 1], timestep=0.5, returnflow=False):
71 timestep = (x[:, :1].clone() * 0 + 1) * timestep
72 img0 = x[:, :3]
73 img1 = x[:, 3:6]
74 gt = x[:, 6:] # In inference time, gt is None
75 flow_list = []
76 merged = []
77 mask_list = []
78 warped_img0 = img0
79 warped_img1 = img1
80 flow = None
81 loss_distill = 0
82 stu = [self.block0, self.block1, self.block2]
83 for i in range(3):
84 if flow != None:
85 flow_d, mask_d = stu[i](
86 torch.cat((img0, img1, timestep, warped_img0, warped_img1, mask), 1), flow, scale=scale[i]
87 )
88 flow = flow + flow_d
89 mask = mask + mask_d
90 else:
91 flow, mask = stu[i](torch.cat((img0, img1, timestep), 1), None, scale=scale[i])
92 mask_list.append(torch.sigmoid(mask))
93 flow_list.append(flow)
94 warped_img0 = warp(img0, flow[:, :2])
95 warped_img1 = warp(img1, flow[:, 2:4])
96 merged_student = (warped_img0, warped_img1)
97 merged.append(merged_student)
98 if gt.shape[1] == 3:
99 flow_d, mask_d = self.block_tea(
100 torch.cat((img0, img1, timestep, warped_img0, warped_img1, mask, gt), 1), flow, scale=1
101 )
102 flow_teacher = flow + flow_d
103 warped_img0_teacher = warp(img0, flow_teacher[:, :2])
104 warped_img1_teacher = warp(img1, flow_teacher[:, 2:4])
105 mask_teacher = torch.sigmoid(mask + mask_d)
106 merged_teacher = warped_img0_teacher * mask_teacher + warped_img1_teacher * (1 - mask_teacher)
107 else:
108 flow_teacher = None
109 merged_teacher = None
110 for i in range(3):
111 merged[i] = merged[i][0] * mask_list[i] + merged[i][1] * (1 - mask_list[i])
112 if gt.shape[1] == 3:
113 loss_mask = (
114 ((merged[i] - gt).abs().mean(1, True) > (merged_teacher - gt).abs().mean(1, True) + 0.01)
115 .float()
116 .detach()
117 )
118 loss_distill += (((flow_teacher.detach() - flow_list[i]) ** 2).mean(1, True) ** 0.5 * loss_mask).mean()
119 if returnflow:
120 return flow
121 else:
122 c0 = self.contextnet(img0, flow[:, :2])
123 c1 = self.contextnet(img1, flow[:, 2:4])
124 tmp = self.unet(img0, img1, warped_img0, warped_img1, mask, flow, c0, c1)
125 res = tmp[:, :3] * 2 - 1
126 merged[2] = torch.clamp(merged[2] + res, 0, 1)
127 return flow_list, mask_list[2], merged, flow_teacher, merged_teacher, loss_distill

Callers

nothing calls this directly

Calls 1

warpFunction · 0.85

Tested by

no test coverage detected