(s string)
| 418 | } |
| 419 | |
| 420 | func (ml *ModelLoader) checkIsLoaded(s string) *Model { |
| 421 | m, ok := ml.store.Get(s) |
| 422 | if !ok { |
| 423 | return nil |
| 424 | } |
| 425 | |
| 426 | xlog.Debug("Model already loaded in memory", "model", s) |
| 427 | |
| 428 | // Skip the gRPC health check if the model was recently verified. |
| 429 | // This avoids serializing concurrent requests behind ml.mu while each |
| 430 | // one does a network round-trip (especially costly in distributed mode). |
| 431 | if m.IsRecentlyHealthy() { |
| 432 | xlog.Debug("Model health check cached, skipping gRPC probe", "model", s) |
| 433 | return m |
| 434 | } |
| 435 | |
| 436 | client := m.GRPC(false, ml.wd) |
| 437 | |
| 438 | xlog.Debug("Checking model availability", "model", s) |
| 439 | cTimeout, cancel := context.WithTimeout(context.Background(), 2*time.Minute) |
| 440 | defer cancel() |
| 441 | |
| 442 | alive, err := client.HealthCheck(cTimeout) |
| 443 | if !alive { |
| 444 | xlog.Warn("GRPC Model not responding", "error", err) |
| 445 | xlog.Warn("Deleting the process in order to recreate it") |
| 446 | process := m.Process() |
| 447 | if process == nil { |
| 448 | // Remote/distributed model — no local process to check. |
| 449 | // Only evict on definitive connection errors (node is down). |
| 450 | // Timeouts may mean the node is busy, so keep the model cached. |
| 451 | if isConnectionError(err) { |
| 452 | xlog.Warn("Remote model unreachable (connection error), removing from cache", "model", s, "error", err) |
| 453 | if delErr := ml.deleteProcess(s); delErr != nil { |
| 454 | xlog.Error("error cleaning up remote model", "error", delErr, "model", s) |
| 455 | } |
| 456 | return nil |
| 457 | } |
| 458 | xlog.Warn("Remote model health check failed (possible timeout), keeping cached", "model", s, "error", err) |
| 459 | return m |
| 460 | } |
| 461 | if !process.IsAlive() { |
| 462 | xlog.Debug("GRPC Process is not responding", "model", s) |
| 463 | // stop and delete the process, this forces to re-load the model and re-create again the service |
| 464 | err := ml.deleteProcess(s) |
| 465 | if err != nil { |
| 466 | xlog.Error("error stopping process", "error", err, "process", s) |
| 467 | } |
| 468 | return nil |
| 469 | } |
| 470 | } |
| 471 | |
| 472 | m.MarkHealthy() |
| 473 | return m |
| 474 | } |
no test coverage detected