| 15 | |
| 16 | |
| 17 | def throughput_test(config, run_config, worker_id: str = '', is_smoke: bool = False): |
| 18 | model = run_config.get('model') |
| 19 | model_path = os.path.join(config.get('model_path'), model) |
| 20 | dataset_path = config.get('dataset_path') |
| 21 | |
| 22 | case_name = get_case_str_by_config(run_config) |
| 23 | benchmark_path = os.path.join(config.get('benchmark_path'), 'throughput') |
| 24 | work_dir = os.path.join(benchmark_path, f'wk_{case_name}') |
| 25 | os.makedirs(work_dir, exist_ok=True) |
| 26 | |
| 27 | max_cache_entry = get_max_cache_entry(model, run_config.get('backend')) |
| 28 | if max_cache_entry is not None: |
| 29 | if 'extra_params' not in run_config: |
| 30 | run_config['extra_params'] = {} |
| 31 | run_config['extra_params']['cache-max-entry-count'] = max_cache_entry |
| 32 | |
| 33 | cuda_prefix = get_cuda_prefix_by_workerid(worker_id, run_config.get('parallel_config')) |
| 34 | |
| 35 | bench_config = copy.deepcopy(run_config) |
| 36 | bench_config['extra_params'] = { |
| 37 | k: v |
| 38 | for k, v in bench_config.get('extra_params', {}).items() if k not in SERVE_ONLY_PARAMS |
| 39 | } |
| 40 | command = f'{cuda_prefix} python3 benchmark/profile_throughput.py {dataset_path} {model_path} {get_cli_common_param(bench_config)}' # noqa |
| 41 | |
| 42 | if is_smoke: |
| 43 | num_prompts = '--num-prompts 100' |
| 44 | else: |
| 45 | num_prompts = '--num-prompts 5000' |
| 46 | |
| 47 | env = os.environ.copy() |
| 48 | env.update(run_config.get('env', {})) |
| 49 | |
| 50 | for batch in [128, 256]: |
| 51 | csv_path = os.path.join(work_dir, f'{batch}.csv') |
| 52 | timestamp = time.strftime('%Y%m%d_%H%M%S') |
| 53 | benchmark_log = os.path.join(benchmark_path, f'log_{case_name}_{batch}_{timestamp}.log') |
| 54 | cmd = ' '.join([command, '--concurrency', str(batch), num_prompts, '--csv ', csv_path]).strip() |
| 55 | |
| 56 | result, stderr = execute_command_with_logging(cmd, benchmark_log, env=env) |
| 57 | allure.attach.file(benchmark_log, name=benchmark_log, attachment_type=allure.attachment_type.TEXT) |
| 58 | |
| 59 | if result and not os.path.isfile(csv_path): |
| 60 | return False, 'result is empty' |
| 61 | if not result: |
| 62 | return False, stderr |
| 63 | |
| 64 | return True, 'success' |
| 65 | |
| 66 | |
| 67 | def longtext_throughput_test(config, run_config, worker_id: str = ''): |