Language Adapter module that uses MOE plugin with static expert selection passed in as a parameter in request. A language MLP is selected by user for each request. see https://arxiv.org/pdf/2005.00052 for more details.
| 60 | |
| 61 | |
| 62 | class LanguageAdapter(Module): |
| 63 | """ |
| 64 | Language Adapter module that uses MOE plugin with static expert selection passed in as a parameter in request. |
| 65 | A language MLP is selected by user for each request. |
| 66 | see https://arxiv.org/pdf/2005.00052 for more details. |
| 67 | """ |
| 68 | |
| 69 | def __init__( |
| 70 | self, |
| 71 | language_adapter_config: LanguageAdapterConfig, |
| 72 | hidden_size: int, |
| 73 | hidden_act: str, |
| 74 | mapping: Mapping = Mapping(), |
| 75 | has_mlp_bias: bool = True, |
| 76 | dtype=None, |
| 77 | quant_mode=QuantMode(0), |
| 78 | ): |
| 79 | super().__init__() |
| 80 | self.config = language_adapter_config |
| 81 | self.config.validate() |
| 82 | |
| 83 | self.layers = MOE( |
| 84 | hidden_size=hidden_size, |
| 85 | ffn_hidden_size=language_adapter_config.ffn_hidden_size, |
| 86 | hidden_act=hidden_act, |
| 87 | dtype=dtype, |
| 88 | bias=has_mlp_bias, |
| 89 | tp_group=mapping.tp_group, |
| 90 | tp_size=mapping.tp_size, |
| 91 | quant_mode=quant_mode, |
| 92 | static_routing=True, |
| 93 | moe_config=self.config.to_MOE_config(), |
| 94 | mapping=mapping) |