| 39 | |
| 40 | |
| 41 | class BackendServicer(backend_pb2_grpc.BackendServicer): |
| 42 | def __init__(self): |
| 43 | self.jobs = {} # job_id -> ActiveJob |
| 44 | |
| 45 | def Health(self, request, context): |
| 46 | return backend_pb2.Reply(message=b"OK") |
| 47 | |
| 48 | def LoadModel(self, request, context): |
| 49 | """Accept LoadModel — actual work happens in StartQuantization.""" |
| 50 | return backend_pb2.Result(success=True, message="OK") |
| 51 | |
| 52 | def StartQuantization(self, request, context): |
| 53 | job_id = request.job_id |
| 54 | if job_id in self.jobs: |
| 55 | return backend_pb2.QuantizationJobResult( |
| 56 | job_id=job_id, |
| 57 | success=False, |
| 58 | message=f"Job {job_id} already exists", |
| 59 | ) |
| 60 | |
| 61 | job = ActiveJob(job_id) |
| 62 | self.jobs[job_id] = job |
| 63 | |
| 64 | job.thread = threading.Thread( |
| 65 | target=self._do_quantization, |
| 66 | args=(job, request), |
| 67 | daemon=True, |
| 68 | ) |
| 69 | job.thread.start() |
| 70 | |
| 71 | return backend_pb2.QuantizationJobResult( |
| 72 | job_id=job_id, |
| 73 | success=True, |
| 74 | message="Quantization job started", |
| 75 | ) |
| 76 | |
| 77 | def _send_progress(self, job, status, message, progress_percent=0.0, output_file="", extra_metrics=None): |
| 78 | update = backend_pb2.QuantizationProgressUpdate( |
| 79 | job_id=job.job_id, |
| 80 | progress_percent=progress_percent, |
| 81 | status=status, |
| 82 | message=message, |
| 83 | output_file=output_file, |
| 84 | extra_metrics=extra_metrics or {}, |
| 85 | ) |
| 86 | job.progress_queue.put(update) |
| 87 | |
| 88 | def _do_quantization(self, job, request): |
| 89 | try: |
| 90 | model = request.model |
| 91 | quant_type = request.quantization_type or "q4_k_m" |
| 92 | output_dir = request.output_dir |
| 93 | extra_options = dict(request.extra_options) if request.extra_options else {} |
| 94 | |
| 95 | os.makedirs(output_dir, exist_ok=True) |
| 96 | |
| 97 | if job.stop_event.is_set(): |
| 98 | self._send_progress(job, "stopped", "Job stopped before starting") |