EstimateMessageTokens returns a rough token-count estimate for a single chat message based on its text length. This is intentionally conservative (overestimates) so that proactive compaction fires before we hit the limit. The estimate accounts for message content, multi-content text parts, reasonin
(msg *chat.Message)
| 39 | // reasoning content, tool call arguments, and a small per-message overhead |
| 40 | // for role/metadata tokens. |
| 41 | func EstimateMessageTokens(msg *chat.Message) int64 { |
| 42 | // charsPerToken: average characters per token. 4 is a widely-used |
| 43 | // heuristic for English; slightly overestimates for code/JSON (~3.5). |
| 44 | const charsPerToken = 4 |
| 45 | |
| 46 | // perMessageOverhead: role, ToolCallID, delimiters, etc. |
| 47 | const perMessageOverhead = 5 |
| 48 | |
| 49 | var chars int |
| 50 | chars += len(msg.Content) |
| 51 | for _, part := range msg.MultiContent { |
| 52 | chars += len(part.Text) |
| 53 | } |
| 54 | chars += len(msg.ReasoningContent) |
| 55 | for _, tc := range msg.ToolCalls { |
| 56 | chars += len(tc.Function.Arguments) |
| 57 | chars += len(tc.Function.Name) |
| 58 | } |
| 59 | |
| 60 | if chars == 0 { |
| 61 | return perMessageOverhead |
| 62 | } |
| 63 | return int64(chars/charsPerToken) + perMessageOverhead |
| 64 | } |
| 65 | |
| 66 | // SplitIndexForKeep walks messages from the end and returns the earliest |
| 67 | // index whose suffix fits in maxTokens, snapping to user/assistant |
no outgoing calls