Process a batch of requests using true batching when possible Args: batch_requests: List of request data dictionaries Returns: List of response dictionaries
(batch_requests)
| 1119 | |
| 1120 | # Set up the batch processor function |
| 1121 | def process_batch_requests(batch_requests): |
| 1122 | """ |
| 1123 | Process a batch of requests using true batching when possible |
| 1124 | |
| 1125 | Args: |
| 1126 | batch_requests: List of request data dictionaries |
| 1127 | |
| 1128 | Returns: |
| 1129 | List of response dictionaries |
| 1130 | """ |
| 1131 | import time |
| 1132 | from optillm.batching import BatchingError |
| 1133 | |
| 1134 | if not batch_requests: |
| 1135 | return [] |
| 1136 | |
| 1137 | logger.info(f"Processing batch of {len(batch_requests)} requests") |
| 1138 | |
| 1139 | # Check if we can use true batching (all requests compatible and using 'none' approach) |
| 1140 | can_use_true_batching = True |
| 1141 | first_req = batch_requests[0] |
| 1142 | |
| 1143 | # Check compatibility across all requests |
| 1144 | for req_data in batch_requests: |
| 1145 | if (req_data['stream'] or |
| 1146 | req_data['approaches'] != first_req['approaches'] or |
| 1147 | req_data['operation'] != first_req['operation'] or |
| 1148 | req_data['model'] != first_req['model']): |
| 1149 | can_use_true_batching = False |
| 1150 | break |
| 1151 | |
| 1152 | # For now, implement sequential processing but with proper infrastructure |
| 1153 | # TODO: Implement true PyTorch/MLX batching in next phase |
| 1154 | responses = [] |
| 1155 | |
| 1156 | for i, req_data in enumerate(batch_requests): |
| 1157 | try: |
| 1158 | logger.debug(f"Processing batch request {i+1}/{len(batch_requests)}") |
| 1159 | |
| 1160 | # Extract request parameters |
| 1161 | system_prompt = req_data['system_prompt'] |
| 1162 | initial_query = req_data['initial_query'] |
| 1163 | client = req_data['client'] |
| 1164 | model = req_data['model'] |
| 1165 | request_config = req_data['request_config'] |
| 1166 | approaches = req_data['approaches'] |
| 1167 | operation = req_data['operation'] |
| 1168 | n = req_data['n'] |
| 1169 | stream = req_data['stream'] |
| 1170 | |
| 1171 | # Validate request |
| 1172 | if stream: |
| 1173 | raise BatchingError("Streaming requests cannot be batched") |
| 1174 | |
| 1175 | # Check if any of the approaches is 'none' |
| 1176 | contains_none = any(approach == 'none' for approach in approaches) |
| 1177 | |
| 1178 | if operation == 'SINGLE' and approaches[0] == 'none': |
nothing calls this directly
no test coverage detected