MCPcopy Index your code
hub / github.com/microsoft/BitNet / main

Function main

utils/convert-ms-to-gguf-bitnet.py:1645–1751  ·  view source on GitHub ↗
(args_in: list[str] | None = None)

Source from the content-addressed store, hash-verified

1643
1644
1645def main(args_in: list[str] | None = None) -> None:
1646 output_choices = ["f32", "f16", "i2"]
1647 if sys.byteorder == "little":
1648 # We currently only support Q8_0 output on little endian systems.
1649 output_choices.append("q8_0")
1650 parser = argparse.ArgumentParser(description="Convert a LLaMA model to a GGML compatible file")
1651 parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model")
1652 parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file")
1653 parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab")
1654 parser.add_argument("--no-vocab", action="store_true", help="store model without the vocab")
1655 parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
1656 parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
1657 parser.add_argument("--vocab-type", help="vocab types to try in order, choose from 'spm', 'bpe', 'hfft' (default: spm,hfft)", default="spm,hfft")
1658 parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
1659 parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
1660 parser.add_argument("--ctx", type=int, help="model training context (default: based on input)")
1661 parser.add_argument("--concurrency", type=int, help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default=DEFAULT_CONCURRENCY)
1662 parser.add_argument("--big-endian", action="store_true", help="model is executed on big endian machine")
1663 parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
1664 parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
1665 parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
1666
1667 args = parser.parse_args(args_in)
1668
1669 if args.verbose:
1670 logging.basicConfig(level=logging.DEBUG)
1671 elif args.dump_single or args.dump:
1672 # Avoid printing anything besides the dump output
1673 logging.basicConfig(level=logging.WARNING)
1674 else:
1675 logging.basicConfig(level=logging.INFO)
1676
1677 if args.no_vocab and args.vocab_only:
1678 raise ValueError("--vocab-only does not make sense with --no-vocab")
1679
1680 if args.dump_single:
1681 model_plus = lazy_load_file(args.model)
1682 do_dump_model(model_plus)
1683 return
1684
1685 if not args.vocab_only:
1686 model_plus = load_some_model(args.model)
1687 else:
1688 model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
1689
1690 if args.dump:
1691 do_dump_model(model_plus)
1692 return
1693
1694 endianess = gguf.GGUFEndian.LITTLE
1695 if args.big_endian:
1696 endianess = gguf.GGUFEndian.BIG
1697
1698 params = Params.load(model_plus)
1699 if params.n_ctx == -1:
1700 if args.ctx is None:
1701 msg = """\
1702 The model doesn't have a context size, and you didn't specify one with --ctx

Callers 1

Calls 13

load_vocabMethod · 0.95
lazy_load_fileFunction · 0.70
do_dump_modelFunction · 0.70
load_some_modelFunction · 0.70
ModelPlusClass · 0.70
VocabFactoryClass · 0.70
convert_model_namesFunction · 0.70
pick_output_typeFunction · 0.70
convert_to_output_typeFunction · 0.70
default_outfileFunction · 0.70
loadMethod · 0.45
write_vocab_onlyMethod · 0.45

Tested by

no test coverage detected