Loads the base model then the (peft) adapter weights
(self, model_path: str, from_pretrained_kwargs: dict)
| 611 | return "peft" in model_path.lower() |
| 612 | |
| 613 | def load_model(self, model_path: str, from_pretrained_kwargs: dict): |
| 614 | """Loads the base model then the (peft) adapter weights""" |
| 615 | from peft import PeftConfig, PeftModel |
| 616 | |
| 617 | config = PeftConfig.from_pretrained(model_path) |
| 618 | base_model_path = config.base_model_name_or_path |
| 619 | if "peft" in base_model_path: |
| 620 | raise ValueError( |
| 621 | f"PeftModelAdapter cannot load a base model with 'peft' in the name: {config.base_model_name_or_path}" |
| 622 | ) |
| 623 | |
| 624 | # Basic proof of concept for loading peft adapters that share the base |
| 625 | # weights. This is pretty messy because Peft re-writes the underlying |
| 626 | # base model and internally stores a map of adapter layers. |
| 627 | # So, to make this work we: |
| 628 | # 1. Cache the first peft model loaded for a given base models. |
| 629 | # 2. Call `load_model` for any follow on Peft models. |
| 630 | # 3. Make sure we load the adapters by the model_path. Why? This is |
| 631 | # what's accessible during inference time. |
| 632 | # 4. In get_generate_stream_function, make sure we load the right |
| 633 | # adapter before doing inference. This *should* be safe when calls |
| 634 | # are blocked the same semaphore. |
| 635 | if peft_share_base_weights: |
| 636 | if base_model_path in peft_model_cache: |
| 637 | model, tokenizer = peft_model_cache[base_model_path] |
| 638 | # Super important: make sure we use model_path as the |
| 639 | # `adapter_name`. |
| 640 | model.load_adapter(model_path, adapter_name=model_path) |
| 641 | else: |
| 642 | base_adapter = get_model_adapter(base_model_path) |
| 643 | base_model, tokenizer = base_adapter.load_model( |
| 644 | base_model_path, from_pretrained_kwargs |
| 645 | ) |
| 646 | # Super important: make sure we use model_path as the |
| 647 | # `adapter_name`. |
| 648 | model = PeftModel.from_pretrained( |
| 649 | base_model, model_path, adapter_name=model_path |
| 650 | ) |
| 651 | peft_model_cache[base_model_path] = (model, tokenizer) |
| 652 | return model, tokenizer |
| 653 | |
| 654 | # In the normal case, load up the base model weights again. |
| 655 | base_adapter = get_model_adapter(base_model_path) |
| 656 | base_model, tokenizer = base_adapter.load_model( |
| 657 | base_model_path, from_pretrained_kwargs |
| 658 | ) |
| 659 | model = PeftModel.from_pretrained(base_model, model_path) |
| 660 | return model, tokenizer |
| 661 | |
| 662 | def get_default_conv_template(self, model_path: str) -> Conversation: |
| 663 | """Uses the conv template of the base model""" |
nothing calls this directly
no test coverage detected