()
| 1092 | return args |
| 1093 | |
| 1094 | def main(): |
| 1095 | global server_config |
| 1096 | global cepo_config |
| 1097 | global request_batcher |
| 1098 | global conversation_logger |
| 1099 | # Call this function at the start of main() |
| 1100 | |
| 1101 | # Load plugins first so they're available in argument parser |
| 1102 | load_plugins() |
| 1103 | |
| 1104 | args = parse_args() |
| 1105 | # Update server_config with all argument values |
| 1106 | server_config.update(vars(args)) |
| 1107 | |
| 1108 | port = server_config['port'] |
| 1109 | |
| 1110 | # Initialize request batcher if batch mode is enabled |
| 1111 | if server_config.get('batch_mode', False): |
| 1112 | logger.info(f"Batch mode enabled: size={server_config['batch_size']}, " |
| 1113 | f"wait={server_config['batch_wait_ms']}ms") |
| 1114 | request_batcher = RequestBatcher( |
| 1115 | max_batch_size=server_config['batch_size'], |
| 1116 | max_wait_ms=server_config['batch_wait_ms'], |
| 1117 | enable_logging=True |
| 1118 | ) |
| 1119 | |
| 1120 | # Set up the batch processor function |
| 1121 | def process_batch_requests(batch_requests): |
| 1122 | """ |
| 1123 | Process a batch of requests using true batching when possible |
| 1124 | |
| 1125 | Args: |
| 1126 | batch_requests: List of request data dictionaries |
| 1127 | |
| 1128 | Returns: |
| 1129 | List of response dictionaries |
| 1130 | """ |
| 1131 | import time |
| 1132 | from optillm.batching import BatchingError |
| 1133 | |
| 1134 | if not batch_requests: |
| 1135 | return [] |
| 1136 | |
| 1137 | logger.info(f"Processing batch of {len(batch_requests)} requests") |
| 1138 | |
| 1139 | # Check if we can use true batching (all requests compatible and using 'none' approach) |
| 1140 | can_use_true_batching = True |
| 1141 | first_req = batch_requests[0] |
| 1142 | |
| 1143 | # Check compatibility across all requests |
| 1144 | for req_data in batch_requests: |
| 1145 | if (req_data['stream'] or |
| 1146 | req_data['approaches'] != first_req['approaches'] or |
| 1147 | req_data['operation'] != first_req['operation'] or |
| 1148 | req_data['model'] != first_req['model']): |
| 1149 | can_use_true_batching = False |
| 1150 | break |
| 1151 |
no test coverage detected