sparse attention arguments.
(parser)
| 287 | return parser |
| 288 | |
| 289 | def add_sparse_args(parser): |
| 290 | """sparse attention arguments.""" |
| 291 | |
| 292 | group = parser.add_argument_group('Sparse Attention', 'sparse configurations') |
| 293 | group.add_argument('--is-sparse', type=int, default=0, |
| 294 | choices=[0, 1, 2], |
| 295 | help='whether use sparse attention. 0 not 1 train 2 inference') # TODO: Temporally not using is-sparse==2 (not optimized), use 0 for inference. |
| 296 | group.add_argument("--query-window", type=int, default=128) |
| 297 | group.add_argument("--key-window-times", type=int, default=6) |
| 298 | group.add_argument("--num-pivot", type=int, default=768) |
| 299 | return parser |
| 300 | |
| 301 | def get_args(): |
| 302 | """Parse all the args.""" |