(model_path:Path, model_params:Dict[str, Union[int, float]])
| 30 | return Path(os.path.dirname(model)) |
| 31 | |
| 32 | def load_model(model_path:Path, model_params:Dict[str, Union[int, float]]) -> Transformer: |
| 33 | # build model |
| 34 | model = Transformer(**model_params, linear=nn.Linear) |
| 35 | |
| 36 | # update layers to add bias |
| 37 | updated_layers = [] |
| 38 | for layer in model.layers: |
| 39 | head_dim = model_params["dim"] // model_params["n_heads"] |
| 40 | layer.attention.wq = nn.Linear(model_params["dim"], model_params["n_heads"] * head_dim, bias=True) |
| 41 | layer.attention.wk = nn.Linear(model_params["dim"], model_params["n_kv_heads"] * head_dim, bias=True) |
| 42 | layer.attention.wv = nn.Linear(model_params["dim"], model_params["n_kv_heads"] * head_dim, bias=True) |
| 43 | updated_layers.append(layer) |
| 44 | model.layers = updated_layers |
| 45 | |
| 46 | # load weights |
| 47 | weights = fix_bf16(convert_from_huggingface(load(str(model_path / "model.safetensors.index.json")), model_params["n_layers"], model_params["n_heads"], model_params["n_kv_heads"], permute_layers=False)) |
| 48 | |
| 49 | # replace weights in model |
| 50 | load_state_dict(model, weights, strict=False, consume=True) |
| 51 | return model |
| 52 | |
| 53 | |
| 54 | if __name__ == "__main__": |
no test coverage detected
searching dependent graphs…