Perform uniform spatial sampling on the images and corresponding boxes. Args: images (tensor): images to perform uniform crop. The dimension is `num frames` x `channel` x `height` x `width`. size (int): size of height and weight to crop the images. spatia
(images, size, spatial_idx, boxes=None, scale_size=None)
| 185 | |
| 186 | |
| 187 | def uniform_crop(images, size, spatial_idx, boxes=None, scale_size=None): |
| 188 | """ |
| 189 | Perform uniform spatial sampling on the images and corresponding boxes. |
| 190 | Args: |
| 191 | images (tensor): images to perform uniform crop. The dimension is |
| 192 | `num frames` x `channel` x `height` x `width`. |
| 193 | size (int): size of height and weight to crop the images. |
| 194 | spatial_idx (int): 0, 1, or 2 for left, center, and right crop if width |
| 195 | is larger than height. Or 0, 1, or 2 for top, center, and bottom |
| 196 | crop if height is larger than width. |
| 197 | boxes (ndarray or None): optional. Corresponding boxes to images. |
| 198 | Dimension is `num boxes` x 4. |
| 199 | scale_size (int): optinal. If not None, resize the images to scale_size before |
| 200 | performing any crop. |
| 201 | Returns: |
| 202 | cropped (tensor): images with dimension of |
| 203 | `num frames` x `channel` x `size` x `size`. |
| 204 | cropped_boxes (ndarray or None): the cropped boxes with dimension of |
| 205 | `num boxes` x 4. |
| 206 | """ |
| 207 | assert spatial_idx in [0, 1, 2] |
| 208 | ndim = len(images.shape) |
| 209 | if ndim == 3: |
| 210 | images = images.unsqueeze(0) |
| 211 | height = images.shape[2] |
| 212 | width = images.shape[3] |
| 213 | |
| 214 | if scale_size is not None: |
| 215 | if width <= height: |
| 216 | width, height = scale_size, int(height / width * scale_size) |
| 217 | else: |
| 218 | width, height = int(width / height * scale_size), scale_size |
| 219 | images = torch.nn.functional.interpolate( |
| 220 | images, |
| 221 | size=(height, width), |
| 222 | mode="bilinear", |
| 223 | align_corners=False, |
| 224 | ) |
| 225 | |
| 226 | y_offset = int(math.ceil((height - size) / 2)) |
| 227 | x_offset = int(math.ceil((width - size) / 2)) |
| 228 | |
| 229 | if height > width: |
| 230 | if spatial_idx == 0: |
| 231 | y_offset = 0 |
| 232 | elif spatial_idx == 2: |
| 233 | y_offset = height - size |
| 234 | else: |
| 235 | if spatial_idx == 0: |
| 236 | x_offset = 0 |
| 237 | elif spatial_idx == 2: |
| 238 | x_offset = width - size |
| 239 | cropped = images[:, :, y_offset : y_offset + size, x_offset : x_offset + size] |
| 240 | cropped_boxes = crop_boxes(boxes, x_offset, y_offset) if boxes is not None else None |
| 241 | if ndim == 3: |
| 242 | cropped = cropped.squeeze(0) |
| 243 | return cropped, cropped_boxes |
| 244 |