OnFirstComplete registers a callback that fires once after the first tracked inference call completes. This is used to release the initial in-flight reservation (set during model load) after the triggering request finishes, so that in-flight returns to 0 when the model is idle.
(fn func())
| 61 | // reservation (set during model load) after the triggering request finishes, |
| 62 | // so that in-flight returns to 0 when the model is idle. |
| 63 | func (c *InFlightTrackingClient) OnFirstComplete(fn func()) { |
| 64 | c.onFirstComplete = fn |
| 65 | } |
| 66 | |
| 67 | func (c *InFlightTrackingClient) track(ctx context.Context) func() { |
| 68 | if err := c.registry.IncrementInFlight(ctx, c.nodeID, c.modelName, c.replicaIndex); err != nil { |
no outgoing calls
no test coverage detected