| 1626 | pass |
| 1627 | |
| 1628 | def show_exhausted_error(self): |
| 1629 | output_tokens = 0 |
| 1630 | if self.partial_response_content: |
| 1631 | output_tokens = self.main_model.token_count(self.partial_response_content) |
| 1632 | max_output_tokens = self.main_model.info.get("max_output_tokens") or 0 |
| 1633 | |
| 1634 | input_tokens = self.main_model.token_count(self.format_messages().all_messages()) |
| 1635 | max_input_tokens = self.main_model.info.get("max_input_tokens") or 0 |
| 1636 | |
| 1637 | total_tokens = input_tokens + output_tokens |
| 1638 | |
| 1639 | fudge = 0.7 |
| 1640 | |
| 1641 | out_err = "" |
| 1642 | if output_tokens >= max_output_tokens * fudge: |
| 1643 | out_err = " -- possibly exceeded output limit!" |
| 1644 | |
| 1645 | inp_err = "" |
| 1646 | if input_tokens >= max_input_tokens * fudge: |
| 1647 | inp_err = " -- possibly exhausted context window!" |
| 1648 | |
| 1649 | tot_err = "" |
| 1650 | if total_tokens >= max_input_tokens * fudge: |
| 1651 | tot_err = " -- possibly exhausted context window!" |
| 1652 | |
| 1653 | res = ["", ""] |
| 1654 | res.append(f"Model {self.main_model.name} has hit a token limit!") |
| 1655 | res.append("Token counts below are approximate.") |
| 1656 | res.append("") |
| 1657 | res.append(f"Input tokens: ~{input_tokens:,} of {max_input_tokens:,}{inp_err}") |
| 1658 | res.append(f"Output tokens: ~{output_tokens:,} of {max_output_tokens:,}{out_err}") |
| 1659 | res.append(f"Total tokens: ~{total_tokens:,} of {max_input_tokens:,}{tot_err}") |
| 1660 | |
| 1661 | if output_tokens >= max_output_tokens: |
| 1662 | res.append("") |
| 1663 | res.append("To reduce output tokens:") |
| 1664 | res.append("- Ask for smaller changes in each request.") |
| 1665 | res.append("- Break your code into smaller source files.") |
| 1666 | if "diff" not in self.main_model.edit_format: |
| 1667 | res.append("- Use a stronger model that can return diffs.") |
| 1668 | |
| 1669 | if input_tokens >= max_input_tokens or total_tokens >= max_input_tokens: |
| 1670 | res.append("") |
| 1671 | res.append("To reduce input tokens:") |
| 1672 | res.append("- Use /tokens to see token usage.") |
| 1673 | res.append("- Use /drop to remove unneeded files from the chat session.") |
| 1674 | res.append("- Use /clear to clear the chat history.") |
| 1675 | res.append("- Break your code into smaller source files.") |
| 1676 | |
| 1677 | res = "".join([line + "\n" for line in res]) |
| 1678 | self.io.tool_error(res) |
| 1679 | self.io.offer_url(urls.token_limits) |
| 1680 | |
| 1681 | def lint_edited(self, fnames): |
| 1682 | res = "" |