StartJob starts a new quantization job.
(ctx context.Context, userID string, req schema.QuantizationJobRequest)
| 171 | |
| 172 | // StartJob starts a new quantization job. |
| 173 | func (s *QuantizationService) StartJob(ctx context.Context, userID string, req schema.QuantizationJobRequest) (*schema.QuantizationJobResponse, error) { |
| 174 | s.mu.Lock() |
| 175 | defer s.mu.Unlock() |
| 176 | |
| 177 | jobID := uuid.New().String() |
| 178 | |
| 179 | backendName := req.Backend |
| 180 | if backendName == "" { |
| 181 | backendName = "llama-cpp-quantization" |
| 182 | } |
| 183 | |
| 184 | quantType := req.QuantizationType |
| 185 | if quantType == "" { |
| 186 | quantType = "q4_k_m" |
| 187 | } |
| 188 | |
| 189 | // Always use DataPath for output — not user-configurable |
| 190 | outputDir := filepath.Join(s.quantizationBaseDir(), jobID) |
| 191 | |
| 192 | // Build gRPC request |
| 193 | grpcReq := &pb.QuantizationRequest{ |
| 194 | Model: req.Model, |
| 195 | QuantizationType: quantType, |
| 196 | OutputDir: outputDir, |
| 197 | JobId: jobID, |
| 198 | ExtraOptions: req.ExtraOptions, |
| 199 | } |
| 200 | |
| 201 | // Load the quantization backend (per-job model ID so multiple jobs can run concurrently) |
| 202 | modelID := backendName + "-quantize-" + jobID |
| 203 | backendModel, err := s.modelLoader.Load( |
| 204 | model.WithBackendString(backendName), |
| 205 | model.WithModel(backendName), |
| 206 | model.WithModelID(modelID), |
| 207 | ) |
| 208 | if err != nil { |
| 209 | return nil, fmt.Errorf("failed to load backend %s: %w", backendName, err) |
| 210 | } |
| 211 | |
| 212 | // Start quantization via gRPC |
| 213 | result, err := backendModel.StartQuantization(ctx, grpcReq) |
| 214 | if err != nil { |
| 215 | return nil, fmt.Errorf("failed to start quantization: %w", err) |
| 216 | } |
| 217 | if !result.Success { |
| 218 | return nil, fmt.Errorf("quantization failed to start: %s", result.Message) |
| 219 | } |
| 220 | |
| 221 | // Track the job |
| 222 | job := &schema.QuantizationJob{ |
| 223 | ID: jobID, |
| 224 | UserID: userID, |
| 225 | Model: req.Model, |
| 226 | Backend: backendName, |
| 227 | ModelID: modelID, |
| 228 | QuantizationType: quantType, |
| 229 | Status: "queued", |
| 230 | OutputDir: outputDir, |
nothing calls this directly
no test coverage detected