Add parser for list command.
()
| 21 | |
| 22 | @staticmethod |
| 23 | def add_parser_chat(): |
| 24 | """Add parser for list command.""" |
| 25 | parser = CLI.subparsers.add_parser('chat', |
| 26 | formatter_class=DefaultsAndTypesHelpFormatter, |
| 27 | description=CLI.chat.__doc__, |
| 28 | help=CLI.chat.__doc__) |
| 29 | parser.set_defaults(run=CLI.chat) |
| 30 | parser.add_argument('model_path', |
| 31 | type=str, |
| 32 | help='The path of a model. it could be one of the following ' |
| 33 | 'options: - i) a local directory path of a turbomind model' |
| 34 | ' which is converted by `lmdeploy convert` command or ' |
| 35 | 'download from ii) and iii). - ii) the model_id of a ' |
| 36 | 'lmdeploy-quantized model hosted inside a model repo on ' |
| 37 | 'huggingface.co, such as "internlm/internlm-chat-20b-4bit",' |
| 38 | ' "lmdeploy/llama2-chat-70b-4bit", etc. - iii) the model_id' |
| 39 | ' of a model hosted inside a model repo on huggingface.co,' |
| 40 | ' such as "internlm/internlm-chat-7b", "qwen/qwen-7b-chat "' |
| 41 | ', "baichuan-inc/baichuan2-7b-chat" and so on') |
| 42 | # common args |
| 43 | ArgumentHelper.backend(parser) |
| 44 | # chat template args |
| 45 | ArgumentHelper.chat_template(parser) |
| 46 | # model args |
| 47 | ArgumentHelper.revision(parser) |
| 48 | ArgumentHelper.download_dir(parser) |
| 49 | ArgumentHelper.trust_remote_code(parser) |
| 50 | |
| 51 | # pytorch engine args |
| 52 | pt_group = parser.add_argument_group('PyTorch engine arguments') |
| 53 | ArgumentHelper.adapters(pt_group) |
| 54 | ArgumentHelper.device(pt_group) |
| 55 | ArgumentHelper.eager_mode(pt_group) |
| 56 | ArgumentHelper.dllm_block_length(pt_group) |
| 57 | # common engine args |
| 58 | dtype_act = ArgumentHelper.dtype(pt_group) |
| 59 | tp_act = ArgumentHelper.tp(pt_group) |
| 60 | session_len_act = ArgumentHelper.session_len(pt_group) |
| 61 | cache_max_entry_act = ArgumentHelper.cache_max_entry_count(pt_group) |
| 62 | prefix_caching_act = ArgumentHelper.enable_prefix_caching(pt_group) |
| 63 | quant_policy = ArgumentHelper.quant_policy(pt_group) |
| 64 | |
| 65 | # turbomind args |
| 66 | tb_group = parser.add_argument_group('TurboMind engine arguments') |
| 67 | # common engine args |
| 68 | tb_group._group_actions.append(dtype_act) |
| 69 | tb_group._group_actions.append(tp_act) |
| 70 | tb_group._group_actions.append(session_len_act) |
| 71 | tb_group._group_actions.append(cache_max_entry_act) |
| 72 | tb_group._group_actions.append(prefix_caching_act) |
| 73 | tb_group._group_actions.append(quant_policy) |
| 74 | ArgumentHelper.model_format(tb_group) |
| 75 | ArgumentHelper.rope_scaling_factor(tb_group) |
| 76 | ArgumentHelper.communicator(tb_group) |
| 77 | ArgumentHelper.cp(tb_group) |
| 78 | ArgumentHelper.async_(tb_group) |
| 79 | |
| 80 | # speculative decoding |
no test coverage detected