| 23 | ) |
| 24 | |
| 25 | func (ml *ModelLoader) deleteProcess(s string) error { |
| 26 | model, ok := ml.store.Get(s) |
| 27 | if !ok { |
| 28 | xlog.Debug("Model not found", "model", s) |
| 29 | return modelNotFoundErr |
| 30 | } |
| 31 | |
| 32 | retries := 1 |
| 33 | for model.GRPC(false, ml.wd).IsBusy() { |
| 34 | xlog.Debug("Model busy. Waiting.", "model", s) |
| 35 | dur := time.Duration(retries*2) * time.Second |
| 36 | if dur > retryTimeout { |
| 37 | dur = retryTimeout |
| 38 | } |
| 39 | time.Sleep(dur) |
| 40 | retries++ |
| 41 | |
| 42 | if retries > 10 && forceBackendShutdown { |
| 43 | xlog.Warn("Model is still busy after retries. Forcing shutdown.", "model", s, "retries", retries) |
| 44 | break |
| 45 | } |
| 46 | } |
| 47 | |
| 48 | xlog.Debug("Deleting process", "model", s) |
| 49 | |
| 50 | // Run unload hooks (e.g. close MCP sessions) |
| 51 | for _, hook := range ml.onUnloadHooks { |
| 52 | hook(s) |
| 53 | } |
| 54 | |
| 55 | // Free GPU resources before stopping the process to ensure VRAM is released |
| 56 | xlog.Debug("Calling Free() to release GPU resources", "model", s) |
| 57 | if err := model.GRPC(false, ml.wd).Free(context.Background()); err != nil { |
| 58 | xlog.Warn("Error freeing GPU resources", "error", err, "model", s) |
| 59 | } |
| 60 | |
| 61 | process := model.Process() |
| 62 | if process == nil { |
| 63 | // No local process — this is a remote/external backend. |
| 64 | // In distributed mode, delegate to the remote unloader to tell |
| 65 | // the backend node to free the model (GPU resources, etc.). |
| 66 | if ml.remoteUnloader != nil { |
| 67 | xlog.Debug("Delegating model unload to remote unloader", "model", s) |
| 68 | if err := ml.remoteUnloader.UnloadRemoteModel(s); err != nil { |
| 69 | xlog.Warn("Remote model unload failed", "model", s, "error", err) |
| 70 | } |
| 71 | } else { |
| 72 | xlog.Debug("No local process and no remote unloader", "model", s) |
| 73 | } |
| 74 | ml.store.Delete(s) |
| 75 | return nil |
| 76 | } |
| 77 | |
| 78 | // Mark the stop as intentional so the exit-watcher logs it as an |
| 79 | // expected stop, not a crash (signal-terminated children report -1). |
| 80 | ml.stoppingProcs.Store(process, struct{}{}) |
| 81 | err := process.Stop() |
| 82 | if err != nil { |