(lm_config, from_weight='pretrain', tokenizer_path='../model', save_dir='../out', device='cuda')
| 117 | |
| 118 | |
| 119 | def init_model(lm_config, from_weight='pretrain', tokenizer_path='../model', save_dir='../out', device='cuda'): |
| 120 | tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) |
| 121 | model = MiniMindForCausalLM(lm_config) |
| 122 | |
| 123 | if from_weight!= 'none': |
| 124 | moe_suffix = '_moe' if lm_config.use_moe else '' |
| 125 | weight_path = f'{save_dir}/{from_weight}_{lm_config.hidden_size}{moe_suffix}.pth' |
| 126 | weights = torch.load(weight_path, map_location=device) |
| 127 | model.load_state_dict(weights, strict=False) |
| 128 | |
| 129 | get_model_params(model, lm_config) |
| 130 | Logger(f'Trainable Params: {sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6:.3f}M') |
| 131 | return model.to(device), tokenizer |
| 132 | |
| 133 | |
| 134 | class SkipBatchSampler(Sampler): |
no test coverage detected