FirstIndexInBudget returns the smallest index N such that messages[N:] fits within contextLimit, snapping to a user/assistant turn boundary. Used to truncate the conversation handed to the summarization model so the request itself doesn't blow the context window. When the entire slice fits within c
(messages []chat.Message, contextLimit int64)
| 107 | // no user/asst turns, it returns len(messages); callers should treat |
| 108 | // that as "nothing to send" and skip the truncation. |
| 109 | func FirstIndexInBudget(messages []chat.Message, contextLimit int64) int { |
| 110 | var tokens int64 |
| 111 | lastValidMessageSeen := len(messages) |
| 112 | for i := range slices.Backward(messages) { |
| 113 | tokens += EstimateMessageTokens(&messages[i]) |
| 114 | if tokens > contextLimit { |
| 115 | return lastValidMessageSeen |
| 116 | } |
| 117 | role := messages[i].Role |
| 118 | if role == chat.MessageRoleUser || role == chat.MessageRoleAssistant { |
| 119 | lastValidMessageSeen = i |
| 120 | } |
| 121 | } |
| 122 | return lastValidMessageSeen |
| 123 | } |