()
| 241 | |
| 242 | |
| 243 | def parse_args(): |
| 244 | parser = argparse.ArgumentParser(description='Benchmark the request throughput of lmdeploy ' |
| 245 | 'in localhost', |
| 246 | formatter_class=DefaultsAndTypesHelpFormatter) |
| 247 | parser.add_argument('dataset', type=str, help='the path dataset') |
| 248 | parser.add_argument('model_path', |
| 249 | type=str, |
| 250 | help='the path of the model in localhost or ' |
| 251 | 'the repo_id of the model in huggingface.co') |
| 252 | parser.add_argument('-c', |
| 253 | '--concurrency', |
| 254 | type=int, |
| 255 | help='Number of working threads to process the sampled prompts', |
| 256 | default=256) |
| 257 | parser.add_argument('-n', '--num-prompts', type=int, help='Number of prompts to process', default=5000) |
| 258 | parser.add_argument('--no-stream-output', action='store_true', help='Use stream output') |
| 259 | parser.add_argument('--skip-tokenize', action='store_true', help='Pre-tokenize input prompts before starting') |
| 260 | parser.add_argument('--skip-detokenize', action='store_true', help='Skip detokenizing output tokens') |
| 261 | parser.add_argument('--cancel-rate', type=float, help='Possibility of a request being canceled', default=0) |
| 262 | parser.add_argument('--use-uvloop', action='store_true') |
| 263 | parser.add_argument('--csv', type=str, help='Where to save the result.', default='./profile_throughput.csv') |
| 264 | parser.add_argument('--seed', type=int, default=0, help='Seed used in sampling prompts from dataset') |
| 265 | parser.add_argument('--distributed-executor-backend', |
| 266 | type=str, |
| 267 | default=None, |
| 268 | choices=['uni', 'mp', 'ray'], |
| 269 | help='backend of executor backend') |
| 270 | parser.add_argument('--dataset-name', |
| 271 | type=str, |
| 272 | default='sharegpt', |
| 273 | choices=['sharegpt', 'random'], |
| 274 | help='Name of the dataset to benchmark on.') |
| 275 | parser.add_argument( |
| 276 | '--sharegpt-output-len', |
| 277 | type=int, |
| 278 | default=None, |
| 279 | help='Output length for each request. Overrides the output length ' |
| 280 | 'from the ShareGPT dataset.', |
| 281 | ) |
| 282 | parser.add_argument( |
| 283 | '--random-input-len', |
| 284 | type=int, |
| 285 | help='Number of input tokens per request, used only for random ' |
| 286 | 'dataset.', |
| 287 | ) |
| 288 | parser.add_argument( |
| 289 | '--random-output-len', |
| 290 | type=int, |
| 291 | help='Number of output tokens per request, used only for random ' |
| 292 | 'dataset.', |
| 293 | ) |
| 294 | parser.add_argument( |
| 295 | '--random-range-ratio', |
| 296 | type=float, |
| 297 | default=0.0, |
| 298 | help='Range of sampled ratio of input/output length, ' |
| 299 | 'used only for random dataset.', |
| 300 | ) |
no test coverage detected