listGatewayModels returns the model IDs served by the configured models gateway, using the runtime's cache when fresh.
(ctx context.Context)
| 37 | // listGatewayModels returns the model IDs served by the configured models |
| 38 | // gateway, using the runtime's cache when fresh. |
| 39 | func (r *LocalRuntime) listGatewayModels(ctx context.Context) ([]string, error) { |
| 40 | now := time.Now |
| 41 | if r.now != nil { |
| 42 | now = r.now |
| 43 | } |
| 44 | |
| 45 | c := &r.gatewayModels |
| 46 | |
| 47 | readFresh := func() (ids []string, ok bool, err error) { |
| 48 | c.mu.Lock() |
| 49 | defer c.mu.Unlock() |
| 50 | if !c.fetchedAt.IsZero() && now().Sub(c.fetchedAt) < gatewayModelsTTL { |
| 51 | return c.ids, true, c.err |
| 52 | } |
| 53 | return nil, false, nil |
| 54 | } |
| 55 | |
| 56 | if ids, ok, err := readFresh(); ok { |
| 57 | slog.DebugContext(ctx, "Gateway model discovery cache hit", "models", len(ids), "error", err) |
| 58 | return ids, err |
| 59 | } |
| 60 | |
| 61 | start := time.Now() |
| 62 | v, err, _ := c.sf.Do("models", func() (any, error) { |
| 63 | // Double-check the cache now that we hold the in-flight slot: a |
| 64 | // caller that read a stale cache right before a concurrent |
| 65 | // singleflight completed would otherwise trigger a redundant |
| 66 | // fetch immediately after the cache was populated. |
| 67 | if ids, ok, err := readFresh(); ok { |
| 68 | return ids, err |
| 69 | } |
| 70 | |
| 71 | ids, err := modelsgateway.ListModels(ctx, r.modelSwitcherCfg.ModelsGateway, r.modelSwitcherCfg.EnvProvider) |
| 72 | if err != nil && ctx.Err() != nil { |
| 73 | return ids, err |
| 74 | } |
| 75 | c.mu.Lock() |
| 76 | defer c.mu.Unlock() |
| 77 | c.ids, c.err, c.fetchedAt = ids, err, now() |
| 78 | return ids, err |
| 79 | }) |
| 80 | if err != nil { |
| 81 | slog.DebugContext(ctx, "Gateway model discovery fetch completed", "duration", time.Since(start), "error", err) |
| 82 | return nil, err |
| 83 | } |
| 84 | ids := v.([]string) |
| 85 | slog.DebugContext(ctx, "Gateway model discovery fetch completed", "duration", time.Since(start), "models", len(ids)) |
| 86 | return ids, nil |
| 87 | } |
| 88 | |
| 89 | // buildGatewayChoices builds ModelChoice entries from the models served by |
| 90 | // the configured gateway, deduplicated against the explicitly configured |