()
| 365 | |
| 366 | |
| 367 | def main(): |
| 368 | parser = argparse.ArgumentParser() |
| 369 | parser.add_argument( |
| 370 | "--sam2_cfg", |
| 371 | type=str, |
| 372 | default="configs/sam2.1/sam2.1_hiera_b+.yaml", |
| 373 | help="SAM 2 model configuration file", |
| 374 | ) |
| 375 | parser.add_argument( |
| 376 | "--sam2_checkpoint", |
| 377 | type=str, |
| 378 | default="./checkpoints/sam2.1_hiera_b+.pt", |
| 379 | help="path to the SAM 2 model checkpoint", |
| 380 | ) |
| 381 | parser.add_argument( |
| 382 | "--base_video_dir", |
| 383 | type=str, |
| 384 | required=True, |
| 385 | help="directory containing videos (as JPEG files) to run VOS prediction on", |
| 386 | ) |
| 387 | parser.add_argument( |
| 388 | "--input_mask_dir", |
| 389 | type=str, |
| 390 | required=True, |
| 391 | help="directory containing input masks (as PNG files) of each video", |
| 392 | ) |
| 393 | parser.add_argument( |
| 394 | "--video_list_file", |
| 395 | type=str, |
| 396 | default=None, |
| 397 | help="text file containing the list of video names to run VOS prediction on", |
| 398 | ) |
| 399 | parser.add_argument( |
| 400 | "--output_mask_dir", |
| 401 | type=str, |
| 402 | required=True, |
| 403 | help="directory to save the output masks (as PNG files)", |
| 404 | ) |
| 405 | parser.add_argument( |
| 406 | "--score_thresh", |
| 407 | type=float, |
| 408 | default=0.0, |
| 409 | help="threshold for the output mask logits (default: 0.0)", |
| 410 | ) |
| 411 | parser.add_argument( |
| 412 | "--use_all_masks", |
| 413 | action="store_true", |
| 414 | help="whether to use all available PNG files in input_mask_dir " |
| 415 | "(default without this flag: just the first PNG file as input to the SAM 2 model; " |
| 416 | "usually we don't need this flag, since semi-supervised VOS evaluation usually takes input from the first frame only)", |
| 417 | ) |
| 418 | parser.add_argument( |
| 419 | "--per_obj_png_file", |
| 420 | action="store_true", |
| 421 | help="whether use separate per-object PNG files for input and output masks " |
| 422 | "(default without this flag: all object masks are packed into a single PNG file on each frame following DAVIS format; " |
| 423 | "note that the SA-V dataset stores each object mask as an individual PNG file and requires this flag)", |
| 424 | ) |
no test coverage detected