| 1136 | |
| 1137 | |
| 1138 | def parse_args() -> argparse.Namespace: |
| 1139 | parser = argparse.ArgumentParser(description='Benchmark /v1/chat/completions with eval JSONL datasets.') |
| 1140 | parser.add_argument( |
| 1141 | '--base-url', |
| 1142 | default='http://127.0.0.1:23333/v1', |
| 1143 | help='OpenAI-compatible API base URL. Requests go to /v1/chat/completions.', |
| 1144 | ) |
| 1145 | parser.add_argument('--api-key', default='', help='Bearer token used for /v1/models and chat requests.') |
| 1146 | parser.add_argument( |
| 1147 | '--input-ids', |
| 1148 | action='store_true', |
| 1149 | help='Pre-tokenize prompts client-side (apply_chat_template) and send input_ids with do_preprocess=false, ' |
| 1150 | 'matching benchmark_generate.py and POST /generate.', |
| 1151 | ) |
| 1152 | parser.add_argument( |
| 1153 | '--model-path', |
| 1154 | default='', |
| 1155 | help='Tokenizer/model path for --input-ids. Defaults to the id from /v1/models.', |
| 1156 | ) |
| 1157 | parser.add_argument( |
| 1158 | '--trust-remote-code', |
| 1159 | action='store_true', |
| 1160 | help='Pass trust_remote_code=True when loading the tokenizer for --input-ids.', |
| 1161 | ) |
| 1162 | parser.add_argument( |
| 1163 | '--dataset-dir', |
| 1164 | type=Path, |
| 1165 | default=Path('./workspace/oc_data'), |
| 1166 | help='Directory containing eval JSONL files. Each file stem is used as the dataset name.', |
| 1167 | ) |
| 1168 | parser.add_argument( |
| 1169 | '--dataset-files', |
| 1170 | type=Path, |
| 1171 | nargs='*', |
| 1172 | help='Explicit JSONL files to benchmark. Overrides dataset discovery from --dataset-dir.', |
| 1173 | ) |
| 1174 | parser.add_argument( |
| 1175 | '--datasets', |
| 1176 | help='Comma-separated dataset names or filename-stem prefixes, e.g. "bbeh" matches bbeh*.jsonl.', |
| 1177 | ) |
| 1178 | parser.add_argument('--num-prompts', type=int, help='Maximum number of sampled prompts from dataset.') |
| 1179 | parser.add_argument('--shuffle', action='store_true', help='Shuffle each dataset before applying --num-prompts.') |
| 1180 | parser.add_argument('--seed', type=int, default=1, help='Random seed for shuffling and request-rate scheduling.') |
| 1181 | parser.add_argument( |
| 1182 | '--mode', |
| 1183 | choices=['concurrency', 'request-rate'], |
| 1184 | default='concurrency', |
| 1185 | help='Benchmark mode: closed-loop concurrency or open-loop request rate.', |
| 1186 | ) |
| 1187 | parser.add_argument( |
| 1188 | '--levels', |
| 1189 | nargs='+', |
| 1190 | type=int, |
| 1191 | default=[1, 16, 32, 64, 128, 256, 512], |
| 1192 | help='Space-separated sweep values. Interpreted as concurrency levels or request rates based on --mode.', |
| 1193 | ) |
| 1194 | parser.add_argument('--repeats', type=int, default=1, help='Number of times to repeat each dataset/level run.') |
| 1195 | parser.add_argument( |