UpgradeBackend sends a backend.upgrade request-reply to a worker node. The worker stops every live process for this backend, force-reinstalls from the gallery (overwriting the on-disk artifact), and replies. The next routine InstallBackend call spawns a fresh process with the new binary - upgrade it
(nodeID, backendType, galleriesJSON, uri, name, alias string, replicaIndex int, opID string, onProgress func(messaging.BackendInstallProgressEvent))
| 200 | // (default 15m). Real-world worst case observed: 8-10 minutes for large |
| 201 | // CUDA-l4t backend images on Jetson over WiFi. |
| 202 | func (a *RemoteUnloaderAdapter) UpgradeBackend(nodeID, backendType, galleriesJSON, uri, name, alias string, replicaIndex int, opID string, onProgress func(messaging.BackendInstallProgressEvent)) (*messaging.BackendUpgradeReply, error) { |
| 203 | subject := messaging.SubjectNodeBackendUpgrade(nodeID) |
| 204 | xlog.Info("Sending NATS backend.upgrade", "nodeID", nodeID, "backend", backendType, "replica", replicaIndex, "opID", opID) |
| 205 | |
| 206 | sub := a.subscribeProgress(nodeID, opID, onProgress) |
| 207 | |
| 208 | reply, err := messaging.RequestJSON[messaging.BackendUpgradeRequest, messaging.BackendUpgradeReply](a.nats, subject, messaging.BackendUpgradeRequest{ |
| 209 | Backend: backendType, |
| 210 | BackendGalleries: galleriesJSON, |
| 211 | URI: uri, |
| 212 | Name: name, |
| 213 | Alias: alias, |
| 214 | ReplicaIndex: int32(replicaIndex), |
| 215 | OpID: opID, |
| 216 | }, a.upgradeTimeout) |
| 217 | |
| 218 | if sub != nil { |
| 219 | _ = sub.Unsubscribe() |
| 220 | } |
| 221 | |
| 222 | if err != nil && isNATSTimeout(err) { |
| 223 | return nil, fmt.Errorf("%w (subject=%s nodeID=%s backend=%s): %v", |
| 224 | galleryop.ErrWorkerStillInstalling, subject, nodeID, backendType, err) |
| 225 | } |
| 226 | return reply, err |
| 227 | } |
| 228 | |
| 229 | // installWithForceFallback is the rolling-update fallback used by |
| 230 | // DistributedBackendManager.UpgradeBackend when backend.upgrade returns |
nothing calls this directly
no test coverage detected