(args_in: list[str] | None = None)
| 1643 | |
| 1644 | |
| 1645 | def main(args_in: list[str] | None = None) -> None: |
| 1646 | output_choices = ["f32", "f16", "i2"] |
| 1647 | if sys.byteorder == "little": |
| 1648 | # We currently only support Q8_0 output on little endian systems. |
| 1649 | output_choices.append("q8_0") |
| 1650 | parser = argparse.ArgumentParser(description="Convert a LLaMA model to a GGML compatible file") |
| 1651 | parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model") |
| 1652 | parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file") |
| 1653 | parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab") |
| 1654 | parser.add_argument("--no-vocab", action="store_true", help="store model without the vocab") |
| 1655 | parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)") |
| 1656 | parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file") |
| 1657 | parser.add_argument("--vocab-type", help="vocab types to try in order, choose from 'spm', 'bpe', 'hfft' (default: spm,hfft)", default="spm,hfft") |
| 1658 | parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input") |
| 1659 | parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)") |
| 1660 | parser.add_argument("--ctx", type=int, help="model training context (default: based on input)") |
| 1661 | parser.add_argument("--concurrency", type=int, help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default=DEFAULT_CONCURRENCY) |
| 1662 | parser.add_argument("--big-endian", action="store_true", help="model is executed on big endian machine") |
| 1663 | parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides") |
| 1664 | parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing") |
| 1665 | parser.add_argument("--verbose", action="store_true", help="increase output verbosity") |
| 1666 | |
| 1667 | args = parser.parse_args(args_in) |
| 1668 | |
| 1669 | if args.verbose: |
| 1670 | logging.basicConfig(level=logging.DEBUG) |
| 1671 | elif args.dump_single or args.dump: |
| 1672 | # Avoid printing anything besides the dump output |
| 1673 | logging.basicConfig(level=logging.WARNING) |
| 1674 | else: |
| 1675 | logging.basicConfig(level=logging.INFO) |
| 1676 | |
| 1677 | if args.no_vocab and args.vocab_only: |
| 1678 | raise ValueError("--vocab-only does not make sense with --no-vocab") |
| 1679 | |
| 1680 | if args.dump_single: |
| 1681 | model_plus = lazy_load_file(args.model) |
| 1682 | do_dump_model(model_plus) |
| 1683 | return |
| 1684 | |
| 1685 | if not args.vocab_only: |
| 1686 | model_plus = load_some_model(args.model) |
| 1687 | else: |
| 1688 | model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None) |
| 1689 | |
| 1690 | if args.dump: |
| 1691 | do_dump_model(model_plus) |
| 1692 | return |
| 1693 | |
| 1694 | endianess = gguf.GGUFEndian.LITTLE |
| 1695 | if args.big_endian: |
| 1696 | endianess = gguf.GGUFEndian.BIG |
| 1697 | |
| 1698 | params = Params.load(model_plus) |
| 1699 | if params.n_ctx == -1: |
| 1700 | if args.ctx is None: |
| 1701 | msg = """\ |
| 1702 | The model doesn't have a context size, and you didn't specify one with --ctx |
no test coverage detected