(self, num_layers: int)
| 313 | return self.moe_ep_size > 1 |
| 314 | |
| 315 | def pp_layers(self, num_layers: int) -> List[int]: |
| 316 | if self.pp_partition is not None: |
| 317 | if len(self.pp_partition) != self.pp_size: |
| 318 | raise ValueError( |
| 319 | f"{len(self.pp_partition)=} does not match {self.pp_size=}." |
| 320 | ) |
| 321 | if sum(self.pp_partition) != num_layers: |
| 322 | raise ValueError( |
| 323 | f"{sum(self.pp_partition)=} does not match {num_layers=}.") |
| 324 | return torch.arange(num_layers).split( |
| 325 | self.pp_partition)[self.pp_rank].tolist() |
| 326 | else: |
| 327 | # If num_layers % pp_size = n != 0, first n ranks get one extra layer |
| 328 | return torch.tensor_split(torch.arange(num_layers), |
| 329 | self.pp_size)[self.pp_rank].tolist() |
| 330 | |
| 331 | def pp_rank_of_layer(self, layer_idx: int, num_layers: int) -> int: |
| 332 | """Return pipeline-parallel rank that owns `layer_idx` for a model with `num_layers` layers. |
no test coverage detected