r""" Generates video frames from text prompt using diffusion process. Args: input_prompt (`str`): Text prompt for content generation size (tupele[`int`], *optional*, defaults to (1280,720)): Controls video resolution, (width,he
(self,
input_prompt,
size=(1280, 720),
frame_num=81,
shift=5.0,
sample_solver='unipc',
sampling_steps=50,
guide_scale=5.0,
n_prompt="",
seed=-1,
offload_model=True)
| 112 | self.sample_neg_prompt = config.sample_neg_prompt |
| 113 | |
| 114 | def generate(self, |
| 115 | input_prompt, |
| 116 | size=(1280, 720), |
| 117 | frame_num=81, |
| 118 | shift=5.0, |
| 119 | sample_solver='unipc', |
| 120 | sampling_steps=50, |
| 121 | guide_scale=5.0, |
| 122 | n_prompt="", |
| 123 | seed=-1, |
| 124 | offload_model=True): |
| 125 | r""" |
| 126 | Generates video frames from text prompt using diffusion process. |
| 127 | |
| 128 | Args: |
| 129 | input_prompt (`str`): |
| 130 | Text prompt for content generation |
| 131 | size (tupele[`int`], *optional*, defaults to (1280,720)): |
| 132 | Controls video resolution, (width,height). |
| 133 | frame_num (`int`, *optional*, defaults to 81): |
| 134 | How many frames to sample from a video. The number should be 4n+1 |
| 135 | shift (`float`, *optional*, defaults to 5.0): |
| 136 | Noise schedule shift parameter. Affects temporal dynamics |
| 137 | sample_solver (`str`, *optional*, defaults to 'unipc'): |
| 138 | Solver used to sample the video. |
| 139 | sampling_steps (`int`, *optional*, defaults to 40): |
| 140 | Number of diffusion sampling steps. Higher values improve quality but slow generation |
| 141 | guide_scale (`float`, *optional*, defaults 5.0): |
| 142 | Classifier-free guidance scale. Controls prompt adherence vs. creativity |
| 143 | n_prompt (`str`, *optional*, defaults to ""): |
| 144 | Negative prompt for content exclusion. If not given, use `config.sample_neg_prompt` |
| 145 | seed (`int`, *optional*, defaults to -1): |
| 146 | Random seed for noise generation. If -1, use random seed. |
| 147 | offload_model (`bool`, *optional*, defaults to True): |
| 148 | If True, offloads models to CPU during generation to save VRAM |
| 149 | |
| 150 | Returns: |
| 151 | torch.Tensor: |
| 152 | Generated video frames tensor. Dimensions: (C, N H, W) where: |
| 153 | - C: Color channels (3 for RGB) |
| 154 | - N: Number of frames (81) |
| 155 | - H: Frame height (from size) |
| 156 | - W: Frame width from size) |
| 157 | """ |
| 158 | # preprocess |
| 159 | F = frame_num |
| 160 | target_shape = (self.vae.model.z_dim, (F - 1) // self.vae_stride[0] + 1, |
| 161 | size[1] // self.vae_stride[1], |
| 162 | size[0] // self.vae_stride[2]) |
| 163 | |
| 164 | seq_len = math.ceil((target_shape[2] * target_shape[3]) / |
| 165 | (self.patch_size[1] * self.patch_size[2]) * |
| 166 | target_shape[1] / self.sp_size) * self.sp_size |
| 167 | |
| 168 | if n_prompt == "": |
| 169 | n_prompt = self.sample_neg_prompt |
| 170 | seed = seed if seed >= 0 else random.randint(0, sys.maxsize) |
| 171 | seed_g = torch.Generator(device=self.device) |
nothing calls this directly
no test coverage detected