RunLLM is the default LLM-based summarization strategy. It clones the parent agent's model with summary-friendly options, builds a fresh compaction agent + child session, hands the work to [LLMArgs.RunAgent], and returns the produced summary together with the kept-tail boundary the runtime needs to
(ctx context.Context, args LLMArgs)
| 139 | // should treat that as "compaction was a no-op" and skip the apply |
| 140 | // step. |
| 141 | func RunLLM(ctx context.Context, args LLMArgs) (result *Result, err error) { |
| 142 | // One INTERNAL `compaction` span covers the LLM-driven summarization |
| 143 | // strategy end-to-end. The inner LLM call gets its own `chat {model}` |
| 144 | // CLIENT child span via the provider decorator, so this parent span |
| 145 | // is a useful aggregate boundary (context limit, summary tokens, |
| 146 | // outcome) without duplicating per-call timing data. |
| 147 | ctx, span := otel.Tracer("github.com/docker/docker-agent/pkg/runtime/compactor").Start( |
| 148 | ctx, |
| 149 | "compaction", |
| 150 | trace.WithSpanKind(trace.SpanKindInternal), |
| 151 | trace.WithAttributes( |
| 152 | attribute.Int64("cagent.compaction.context_limit", args.ContextLimit), |
| 153 | ), |
| 154 | ) |
| 155 | defer func() { |
| 156 | if err != nil { |
| 157 | span.RecordError(err) |
| 158 | span.SetStatus(codes.Error, err.Error()) |
| 159 | } |
| 160 | if result != nil { |
| 161 | // `Result.InputTokens` actually holds the compaction |
| 162 | // sub-session's *output* token count (the summary length) |
| 163 | // per the field's doc — name the span attribute by what the |
| 164 | // value is, not by what the source struct field is named. |
| 165 | span.SetAttributes( |
| 166 | attribute.Int("cagent.compaction.summary_output_tokens", int(result.InputTokens)), |
| 167 | attribute.Float64("cagent.compaction.cost", result.Cost), |
| 168 | attribute.Int("cagent.compaction.first_kept_entry", result.FirstKeptEntry), |
| 169 | ) |
| 170 | } |
| 171 | span.End() |
| 172 | }() |
| 173 | |
| 174 | if args.RunAgent == nil { |
| 175 | return nil, errors.New("compactor: RunAgent is required") |
| 176 | } |
| 177 | if args.Agent == nil { |
| 178 | return nil, errors.New("compactor: Agent is required") |
| 179 | } |
| 180 | if args.ContextLimit <= 0 { |
| 181 | return nil, errors.New("compactor: ContextLimit must be > 0") |
| 182 | } |
| 183 | // A dedicated compaction model (when configured) generates the summary; |
| 184 | // otherwise the summary runs on the agent's own model. ContextLimit was |
| 185 | // resolved against this same model by the runtime, so the token budgets |
| 186 | // below are scaled to the window that will actually serve the call. |
| 187 | baseModel := args.Agent.CompactionModel() |
| 188 | if baseModel == nil { |
| 189 | baseModel = args.Agent.Model(ctx) |
| 190 | } |
| 191 | if baseModel == nil { |
| 192 | return nil, errors.New("compactor: agent has no model") |
| 193 | } |
| 194 | |
| 195 | summaryModel := provider.CloneWithOptions(ctx, baseModel, |
| 196 | options.WithStructuredOutput(nil), |
| 197 | options.WithMaxTokens(summaryTokenBudget(args.ContextLimit)), |
| 198 | ) |