UnloadModelOnNode sends a model.unload request to a specific node. The worker calls gRPC Free() to release GPU memory.
(nodeID, modelName string)
| 293 | // UnloadModelOnNode sends a model.unload request to a specific node. |
| 294 | // The worker calls gRPC Free() to release GPU memory. |
| 295 | func (a *RemoteUnloaderAdapter) UnloadModelOnNode(nodeID, modelName string) error { |
| 296 | subject := messaging.SubjectNodeModelUnload(nodeID) |
| 297 | xlog.Info("Sending NATS model.unload", "nodeID", nodeID, "model", modelName) |
| 298 | |
| 299 | reply, err := messaging.RequestJSON[messaging.ModelUnloadRequest, messaging.ModelUnloadReply](a.nats, subject, messaging.ModelUnloadRequest{ModelName: modelName}, 30*time.Second) |
| 300 | if err != nil { |
| 301 | return err |
| 302 | } |
| 303 | if !reply.Success { |
| 304 | return fmt.Errorf("model.unload on node %s: %s", nodeID, reply.Error) |
| 305 | } |
| 306 | return nil |
| 307 | } |
| 308 | |
| 309 | // DeleteModelFiles sends model.delete to all nodes that have the model cached. |
| 310 | // This removes model files from worker disks. |
nothing calls this directly
no test coverage detected