r""" Generates video frames from input image using diffusion process. Args: src_root_path ('str'): Process output path replace_flag (`bool`, *optional*, defaults to False): Whether to use character replace. clip_len
(
self,
src_root_path,
replace_flag=False,
clip_len=77,
refert_num=1,
shift=5.0,
sample_solver='dpm++',
sampling_steps=20,
guide_scale=1,
input_prompt="",
n_prompt="",
seed=-1,
offload_model=True,
)
| 295 | return bg_images, mask_images |
| 296 | |
| 297 | def generate( |
| 298 | self, |
| 299 | src_root_path, |
| 300 | replace_flag=False, |
| 301 | clip_len=77, |
| 302 | refert_num=1, |
| 303 | shift=5.0, |
| 304 | sample_solver='dpm++', |
| 305 | sampling_steps=20, |
| 306 | guide_scale=1, |
| 307 | input_prompt="", |
| 308 | n_prompt="", |
| 309 | seed=-1, |
| 310 | offload_model=True, |
| 311 | ): |
| 312 | r""" |
| 313 | Generates video frames from input image using diffusion process. |
| 314 | |
| 315 | Args: |
| 316 | src_root_path ('str'): |
| 317 | Process output path |
| 318 | replace_flag (`bool`, *optional*, defaults to False): |
| 319 | Whether to use character replace. |
| 320 | clip_len (`int`, *optional*, defaults to 77): |
| 321 | How many frames to generate per clips. The number should be 4n+1 |
| 322 | refert_num (`int`, *optional*, defaults to 1): |
| 323 | How many frames used for temporal guidance. Recommended to be 1 or 5. |
| 324 | shift (`float`, *optional*, defaults to 5.0): |
| 325 | Noise schedule shift parameter. |
| 326 | sample_solver (`str`, *optional*, defaults to 'dpm++'): |
| 327 | Solver used to sample the video. |
| 328 | sampling_steps (`int`, *optional*, defaults to 20): |
| 329 | Number of diffusion sampling steps. Higher values improve quality but slow generation |
| 330 | guide_scale (`float` or tuple[`float`], *optional*, defaults 1.0): |
| 331 | Classifier-free guidance scale. We only use it for expression control. |
| 332 | In most cases, it's not necessary and faster generation can be achieved without it. |
| 333 | When expression adjustments are needed, you may consider using this feature. |
| 334 | input_prompt (`str`): |
| 335 | Text prompt for content generation. We don't recommend custom prompts (although they work) |
| 336 | n_prompt (`str`, *optional*, defaults to ""): |
| 337 | Negative prompt for content exclusion. If not given, use `config.sample_neg_prompt` |
| 338 | seed (`int`, *optional*, defaults to -1): |
| 339 | Random seed for noise generation. If -1, use random seed |
| 340 | offload_model (`bool`, *optional*, defaults to True): |
| 341 | If True, offloads models to CPU during generation to save VRAM |
| 342 | |
| 343 | Returns: |
| 344 | torch.Tensor: |
| 345 | Generated video frames tensor. Dimensions: (C, N, H, W) where: |
| 346 | - C: Color channels (3 for RGB) |
| 347 | - N: Number of frames |
| 348 | - H: Frame height |
| 349 | - W: Frame width |
| 350 | """ |
| 351 | assert refert_num == 1 or refert_num == 5, "refert_num should be 1 or 5." |
| 352 | |
| 353 | seed_g = torch.Generator(device=self.device) |
| 354 | seed_g.manual_seed(seed) |
no test coverage detected