r""" Preprocesses input video(s). Keyword arguments will be forwarded to `VaeImageProcessor.preprocess`. Args: video (`list[PIL.Image]`, `list[list[PIL.Image]]`, `torch.Tensor`, `np.array`, `list[torch.Tensor]`, `list[np.array]`): The input video. It can
(self, video, height: int | None = None, width: int | None = None, **kwargs)
| 26 | r"""Simple video processor.""" |
| 27 | |
| 28 | def preprocess_video(self, video, height: int | None = None, width: int | None = None, **kwargs) -> torch.Tensor: |
| 29 | r""" |
| 30 | Preprocesses input video(s). Keyword arguments will be forwarded to `VaeImageProcessor.preprocess`. |
| 31 | |
| 32 | Args: |
| 33 | video (`list[PIL.Image]`, `list[list[PIL.Image]]`, `torch.Tensor`, `np.array`, `list[torch.Tensor]`, `list[np.array]`): |
| 34 | The input video. It can be one of the following: |
| 35 | * list of the PIL images. |
| 36 | * list of list of PIL images. |
| 37 | * 4D Torch tensors (expected shape for each tensor `(num_frames, num_channels, height, width)`). |
| 38 | * 4D NumPy arrays (expected shape for each array `(num_frames, height, width, num_channels)`). |
| 39 | * list of 4D Torch tensors (expected shape for each tensor `(num_frames, num_channels, height, |
| 40 | width)`). |
| 41 | * list of 4D NumPy arrays (expected shape for each array `(num_frames, height, width, num_channels)`). |
| 42 | * 5D NumPy arrays: expected shape for each array `(batch_size, num_frames, height, width, |
| 43 | num_channels)`. |
| 44 | * 5D Torch tensors: expected shape for each array `(batch_size, num_frames, num_channels, height, |
| 45 | width)`. |
| 46 | height (`int`, *optional*, defaults to `None`): |
| 47 | The height in preprocessed frames of the video. If `None`, will use the `get_default_height_width()` to |
| 48 | get default height. |
| 49 | width (`int`, *optional*`, defaults to `None`): |
| 50 | The width in preprocessed frames of the video. If `None`, will use get_default_height_width()` to get |
| 51 | the default width. |
| 52 | |
| 53 | Returns: |
| 54 | `torch.Tensor` of shape `(batch_size, num_channels, num_frames, height, width)`: |
| 55 | A 5D tensor holding the batched channels-first video(s). |
| 56 | """ |
| 57 | if isinstance(video, list) and isinstance(video[0], np.ndarray) and video[0].ndim == 5: |
| 58 | warnings.warn( |
| 59 | "Passing `video` as a list of 5d np.ndarray is deprecated." |
| 60 | "Please concatenate the list along the batch dimension and pass it as a single 5d np.ndarray", |
| 61 | FutureWarning, |
| 62 | ) |
| 63 | video = np.concatenate(video, axis=0) |
| 64 | if isinstance(video, list) and isinstance(video[0], torch.Tensor) and video[0].ndim == 5: |
| 65 | warnings.warn( |
| 66 | "Passing `video` as a list of 5d torch.Tensor is deprecated." |
| 67 | "Please concatenate the list along the batch dimension and pass it as a single 5d torch.Tensor", |
| 68 | FutureWarning, |
| 69 | ) |
| 70 | video = torch.cat(video, axis=0) |
| 71 | |
| 72 | # ensure the input is a list of videos: |
| 73 | # - if it is a batch of videos (5d torch.Tensor or np.ndarray), it is converted to a list of videos (a list of 4d torch.Tensor or np.ndarray) |
| 74 | # - if it is a single video, it is converted to a list of one video. |
| 75 | if isinstance(video, (np.ndarray, torch.Tensor)) and video.ndim == 5: |
| 76 | video = list(video) |
| 77 | elif isinstance(video, list) and is_valid_image(video[0]) or is_valid_image_imagelist(video): |
| 78 | video = [video] |
| 79 | elif isinstance(video, list) and is_valid_image_imagelist(video[0]): |
| 80 | video = video |
| 81 | else: |
| 82 | raise ValueError( |
| 83 | "Input is in incorrect format. Currently, we only support numpy.ndarray, torch.Tensor, PIL.Image.Image" |
| 84 | ) |
| 85 |