Load a GGUF file and return a dictionary of parsed parameters containing tensors, the parsed tokenizer and config attributes. Args: gguf_checkpoint_path (`str`): The path the to GGUF file to load return_tensors (`bool`, defaults to `True`): Wheth
(gguf_checkpoint_path, return_tensors=False)
| 639 | |
| 640 | |
| 641 | def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False): |
| 642 | """ |
| 643 | Load a GGUF file and return a dictionary of parsed parameters containing tensors, the parsed tokenizer and config |
| 644 | attributes. |
| 645 | |
| 646 | Args: |
| 647 | gguf_checkpoint_path (`str`): |
| 648 | The path the to GGUF file to load |
| 649 | return_tensors (`bool`, defaults to `True`): |
| 650 | Whether to read the tensors from the file and return them. Not doing so is faster and only loads the |
| 651 | metadata in memory. |
| 652 | """ |
| 653 | |
| 654 | if is_gguf_available() and is_torch_available(): |
| 655 | import gguf |
| 656 | from gguf import GGUFReader |
| 657 | |
| 658 | from ..quantizers.gguf.utils import SUPPORTED_GGUF_QUANT_TYPES, GGUFParameter |
| 659 | else: |
| 660 | logger.error( |
| 661 | "Loading a GGUF checkpoint in PyTorch, requires both PyTorch and GGUF>=0.10.0 to be installed. Please see " |
| 662 | "https://pytorch.org/ and https://github.com/ggerganov/llama.cpp/tree/master/gguf-py for installation instructions." |
| 663 | ) |
| 664 | raise ImportError("Please install torch and gguf>=0.10.0 to load a GGUF checkpoint in PyTorch.") |
| 665 | |
| 666 | reader = GGUFReader(gguf_checkpoint_path) |
| 667 | |
| 668 | parsed_parameters = {} |
| 669 | for tensor in reader.tensors: |
| 670 | name = tensor.name |
| 671 | quant_type = tensor.tensor_type |
| 672 | |
| 673 | # if the tensor is a torch supported dtype do not use GGUFParameter |
| 674 | is_gguf_quant = quant_type not in [gguf.GGMLQuantizationType.F32, gguf.GGMLQuantizationType.F16] |
| 675 | if is_gguf_quant and quant_type not in SUPPORTED_GGUF_QUANT_TYPES: |
| 676 | _supported_quants_str = "\n".join([str(type) for type in SUPPORTED_GGUF_QUANT_TYPES]) |
| 677 | raise ValueError( |
| 678 | ( |
| 679 | f"{name} has a quantization type: {str(quant_type)} which is unsupported." |
| 680 | "\n\nCurrently the following quantization types are supported: \n\n" |
| 681 | f"{_supported_quants_str}" |
| 682 | "\n\nTo request support for this quantization type please open an issue here: https://github.com/huggingface/diffusers" |
| 683 | ) |
| 684 | ) |
| 685 | |
| 686 | weights = torch.from_numpy(tensor.data.copy()) |
| 687 | parsed_parameters[name] = GGUFParameter(weights, quant_type=quant_type) if is_gguf_quant else weights |
| 688 | |
| 689 | return parsed_parameters |
| 690 | |
| 691 | |
| 692 | def _find_mismatched_keys(state_dict, model_state_dict, loaded_keys, ignore_mismatched_sizes): |
no test coverage detected
searching dependent graphs…