r"""This function is changed from colossalai, which is ``sync_model_param``. We modified this function to make sure it only sync parameters within tensor parallelism but they are not splitted by tensor parallelism. This function is used to make sure parameters that are not splitted by t
(model)
| 25 | |
| 26 | |
| 27 | def sync_model_param_within_tp(model): |
| 28 | r"""This function is changed from colossalai, which is ``sync_model_param``. |
| 29 | |
| 30 | We modified this function to make sure it only sync parameters within tensor parallelism |
| 31 | but they are not splitted by tensor parallelism. |
| 32 | This function is used to make sure parameters that are not splitted by tensor parallelism |
| 33 | are the same across each tensor parallelism. |
| 34 | For example, parameters like RMSNorm, LayerNorm... |
| 35 | |
| 36 | Args: |
| 37 | model (:class:`torch.nn.Module`): A pyTorch model on whose parameters you check the consistency. |
| 38 | """ |
| 39 | parallel_mode = ParallelMode.TENSOR |
| 40 | if gpc.is_initialized(parallel_mode) and gpc.get_world_size(parallel_mode) > 1: |
| 41 | for param in model.parameters(): |
| 42 | if not is_model_parallel_parameter(param): |
| 43 | ranks = gpc.get_ranks_in_group(parallel_mode) |
| 44 | dist.broadcast(param, src=ranks[0], group=gpc.get_group(parallel_mode)) |
| 45 | |
| 46 | |
| 47 | def is_no_pp_or_last_stage(): |
no test coverage detected