From a30e656304250206d14c41f99d34740e166768b1 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Fri, 14 Jun 2024 07:28:33 -0700 Subject: [PATCH] improved token limit err msgs and docs #678 --- aider/coders/base_coder.py | 70 ++++++++++++++-- aider/urls.py | 1 + website/docs/troubleshooting/token-limits.md | 86 ++++++++++++++++++++ 3 files changed, 150 insertions(+), 7 deletions(-) create mode 100644 website/docs/troubleshooting/token-limits.md diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 16dc55c78..b704841fa 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -795,8 +795,11 @@ class Coder: except ExhaustedContextWindow: exhausted = True except litellm.exceptions.BadRequestError as err: - self.io.tool_error(f"BadRequestError: {err}") - return + if "ContextWindowExceededError" in err.message: + exhausted = True + else: + self.io.tool_error(f"BadRequestError: {err}") + return except openai.BadRequestError as err: if "maximum context length" in str(err): exhausted = True @@ -804,12 +807,8 @@ class Coder: raise err if exhausted: + self.show_exhausted_error() self.num_exhausted_context_windows += 1 - self.io.tool_error("The chat session is larger than the context window!\n") - self.commands.cmd_tokens("") - self.io.tool_error("\nTo reduce token usage:") - self.io.tool_error(" - Use /drop to remove unneeded files from the chat session.") - self.io.tool_error(" - Use /clear to clear chat history.") return if self.partial_response_function_call: @@ -878,6 +877,63 @@ class Coder: else: self.reflected_message = add_rel_files_message + def show_exhausted_error(self): + output_tokens = 0 + if self.partial_response_content: + output_tokens = self.main_model.token_count(self.partial_response_content) + max_output_tokens = self.main_model.info.get("max_output_tokens", 0) + + input_tokens = self.main_model.token_count(self.format_messages()) + max_input_tokens = self.main_model.info.get("max_input_tokens", 0) + + total_tokens = input_tokens + output_tokens + + if output_tokens >= max_output_tokens: + out_err = " -- exceeded output limit!" + else: + out_err = "" + + if input_tokens >= max_input_tokens: + inp_err = " -- context window exhausted!" + else: + inp_err = "" + + if total_tokens >= max_input_tokens: + tot_err = " -- context window exhausted!" + else: + tot_err = "" + + res = ["", ""] + res.append(f"Model {self.main_model.name} has hit a token limit!") + res.append("") + res.append(f"Input tokens: {input_tokens} of {max_input_tokens}{inp_err}") + res.append(f"Output tokens: {output_tokens} of {max_output_tokens}{out_err}") + res.append(f"Total tokens: {total_tokens} of {max_input_tokens}{tot_err}") + + if output_tokens >= max_output_tokens: + res.append("") + res.append("To reduce output tokens:") + res.append("- Ask for smaller changes in each request.") + res.append("- Break your code into smaller source files.") + if "diff" not in self.main_model.edit_format: + res.append( + "- Try using a stronger model like gpt-4o or opus that can return diffs." + ) + + if input_tokens >= max_input_tokens or total_tokens >= max_input_tokens: + res.append("") + res.append("To reduce input tokens:") + res.append("- Use /tokens to see token usage.") + res.append("- Use /drop to remove unneeded files from the chat session.") + res.append("- Use /clear to clear the chat history.") + res.append("- Break your code into smaller source files.") + + res.append("") + res.append(f"For more info: {urls.token_limits}") + + res = "".join([line + "\n" for line in res]) + self.io.tool_error(res) + def lint_edited(self, fnames): res = "" for fname in fnames: diff --git a/aider/urls.py b/aider/urls.py index 62f95503d..5e2c7761e 100644 --- a/aider/urls.py +++ b/aider/urls.py @@ -5,3 +5,4 @@ git = "https://aider.chat/docs/git.html" enable_playwrite = "https://aider.chat/docs/install/optional.html#enable-playwright" favicon = "https://aider.chat/assets/icons/favicon-32x32.png" model_warnings = "https://aider.chat/docs/llms/warnings.html" +token_limits = "https://aider.chat/docs/toubleshooting/token-limits.html" diff --git a/website/docs/troubleshooting/token-limits.md b/website/docs/troubleshooting/token-limits.md new file mode 100644 index 000000000..ec4bb632e --- /dev/null +++ b/website/docs/troubleshooting/token-limits.md @@ -0,0 +1,86 @@ +--- +parent: Troubleshooting +nav_order: 25 +--- + +# Token limits + +Every LLM has limits on how many tokens it can process: + +- The model's **context window** limits how many tokens of +*input and output* it can process. +- Each model has limit on how many **output tokens** it can +produce. + +Aider will report an error if a model responds indicating that +it has exceeded a token limit. +The error will include suggested actions to try and +avoid hitting token limits. +Here's an example error: + +``` +Model gpt-3.5-turbo has hit a token limit! + +Input tokens: 768 of 16385 +Output tokens: 4096 of 4096 -- exceeded output limit! +Total tokens: 4864 of 16385 + +To reduce output tokens: +- Ask for smaller changes in each request. +- Break your code into smaller source files. +- Try using a stronger model like gpt-4o or opus that can return diffs. + +For more info: https://aider.chat/docs/token-limits.html +``` + +## Input tokens & context window size + +The most common problem is trying to send too much data to a +model, +overflowing its context window. +Technically you can exhaust the context window if the input is +too large or if the input plus output are too large. + +Strong models like GPT-4o and Opus have quite +large context windows, so this sort of error is +typically only an issue when working with weaker models. + +The easiest solution is to try and reduce the input tokens +by removing files from the chat. +It's best to only add the files that aider will need to *edit* +to complete your request. + +- Use `/tokens` to see token usage. +- Use `/drop` to remove unneeded files from the chat session. +- Use `/clear` to clear the chat history. +- Break your code into smaller source files. + +## Output token limits + +Most models have quite small output limits, often as low +as 4k tokens. +If you ask aider to make a large change that affects a lot +of code, the LLM may hit output token limits +as it tries to send back all the changes. + +To avoid hitting output token limits: + +- Ask for smaller changes in each request. +- Break your code into smaller source files. +- Try using a stronger model like gpt-4o or opus that can return diffs. + + +## Other causes + +Sometimes token limit errors are caused by +non-compliant API proxy servers +or bugs in the API server you are using to host a local model. +Aider has been well tested when directly connecting to +major +[LLM provider cloud APIs](https://aider.chat/docs/llms.html). +For serving local models, +[Ollama](https://aider.chat/docs/llms/ollama.html) is known to work well with aider. + +Try using aider without an API proxy server +or directly with one of the recommended cloud APIs +and see if your token limit problems resolve.