Load a HuggingFace model and tokenizer.
(hf_path: str, device)
| 65 | |
| 66 | |
| 67 | def load_hf_model(hf_path: str, device): |
| 68 | """Load a HuggingFace model and tokenizer.""" |
| 69 | print0(f"Loading HuggingFace model from: {hf_path}") |
| 70 | from transformers import AutoModelForCausalLM |
| 71 | model = AutoModelForCausalLM.from_pretrained(hf_path) |
| 72 | model.to(device) |
| 73 | model.eval() |
| 74 | max_seq_len = 1024 if "gpt2" in hf_path else None |
| 75 | model = ModelWrapper(model, max_seq_len=max_seq_len) |
| 76 | tokenizer = HuggingFaceTokenizer.from_pretrained(hf_path) |
| 77 | return model, tokenizer |
| 78 | |
| 79 | |
| 80 | def get_hf_token_bytes(tokenizer, device="cpu"): |
no test coverage detected