improved token limit err msgs and docs #678

This commit is contained in:
Paul Gauthier 2024-06-14 07:28:33 -07:00
parent 0fc6b9beaa
commit a30e656304
3 changed files with 150 additions and 7 deletions

View file

@ -795,8 +795,11 @@ class Coder:
except ExhaustedContextWindow: except ExhaustedContextWindow:
exhausted = True exhausted = True
except litellm.exceptions.BadRequestError as err: except litellm.exceptions.BadRequestError as err:
self.io.tool_error(f"BadRequestError: {err}") if "ContextWindowExceededError" in err.message:
return exhausted = True
else:
self.io.tool_error(f"BadRequestError: {err}")
return
except openai.BadRequestError as err: except openai.BadRequestError as err:
if "maximum context length" in str(err): if "maximum context length" in str(err):
exhausted = True exhausted = True
@ -804,12 +807,8 @@ class Coder:
raise err raise err
if exhausted: if exhausted:
self.show_exhausted_error()
self.num_exhausted_context_windows += 1 self.num_exhausted_context_windows += 1
self.io.tool_error("The chat session is larger than the context window!\n")
self.commands.cmd_tokens("")
self.io.tool_error("\nTo reduce token usage:")
self.io.tool_error(" - Use /drop to remove unneeded files from the chat session.")
self.io.tool_error(" - Use /clear to clear chat history.")
return return
if self.partial_response_function_call: if self.partial_response_function_call:
@ -878,6 +877,63 @@ class Coder:
else: else:
self.reflected_message = add_rel_files_message self.reflected_message = add_rel_files_message
def show_exhausted_error(self):
output_tokens = 0
if self.partial_response_content:
output_tokens = self.main_model.token_count(self.partial_response_content)
max_output_tokens = self.main_model.info.get("max_output_tokens", 0)
input_tokens = self.main_model.token_count(self.format_messages())
max_input_tokens = self.main_model.info.get("max_input_tokens", 0)
total_tokens = input_tokens + output_tokens
if output_tokens >= max_output_tokens:
out_err = " -- exceeded output limit!"
else:
out_err = ""
if input_tokens >= max_input_tokens:
inp_err = " -- context window exhausted!"
else:
inp_err = ""
if total_tokens >= max_input_tokens:
tot_err = " -- context window exhausted!"
else:
tot_err = ""
res = ["", ""]
res.append(f"Model {self.main_model.name} has hit a token limit!")
res.append("")
res.append(f"Input tokens: {input_tokens} of {max_input_tokens}{inp_err}")
res.append(f"Output tokens: {output_tokens} of {max_output_tokens}{out_err}")
res.append(f"Total tokens: {total_tokens} of {max_input_tokens}{tot_err}")
if output_tokens >= max_output_tokens:
res.append("")
res.append("To reduce output tokens:")
res.append("- Ask for smaller changes in each request.")
res.append("- Break your code into smaller source files.")
if "diff" not in self.main_model.edit_format:
res.append(
"- Try using a stronger model like gpt-4o or opus that can return diffs."
)
if input_tokens >= max_input_tokens or total_tokens >= max_input_tokens:
res.append("")
res.append("To reduce input tokens:")
res.append("- Use /tokens to see token usage.")
res.append("- Use /drop to remove unneeded files from the chat session.")
res.append("- Use /clear to clear the chat history.")
res.append("- Break your code into smaller source files.")
res.append("")
res.append(f"For more info: {urls.token_limits}")
res = "".join([line + "\n" for line in res])
self.io.tool_error(res)
def lint_edited(self, fnames): def lint_edited(self, fnames):
res = "" res = ""
for fname in fnames: for fname in fnames:

View file

@ -5,3 +5,4 @@ git = "https://aider.chat/docs/git.html"
enable_playwrite = "https://aider.chat/docs/install/optional.html#enable-playwright" enable_playwrite = "https://aider.chat/docs/install/optional.html#enable-playwright"
favicon = "https://aider.chat/assets/icons/favicon-32x32.png" favicon = "https://aider.chat/assets/icons/favicon-32x32.png"
model_warnings = "https://aider.chat/docs/llms/warnings.html" model_warnings = "https://aider.chat/docs/llms/warnings.html"
token_limits = "https://aider.chat/docs/toubleshooting/token-limits.html"

View file

@ -0,0 +1,86 @@
---
parent: Troubleshooting
nav_order: 25
---
# Token limits
Every LLM has limits on how many tokens it can process:
- The model's **context window** limits how many tokens of
*input and output* it can process.
- Each model has limit on how many **output tokens** it can
produce.
Aider will report an error if a model responds indicating that
it has exceeded a token limit.
The error will include suggested actions to try and
avoid hitting token limits.
Here's an example error:
```
Model gpt-3.5-turbo has hit a token limit!
Input tokens: 768 of 16385
Output tokens: 4096 of 4096 -- exceeded output limit!
Total tokens: 4864 of 16385
To reduce output tokens:
- Ask for smaller changes in each request.
- Break your code into smaller source files.
- Try using a stronger model like gpt-4o or opus that can return diffs.
For more info: https://aider.chat/docs/token-limits.html
```
## Input tokens & context window size
The most common problem is trying to send too much data to a
model,
overflowing its context window.
Technically you can exhaust the context window if the input is
too large or if the input plus output are too large.
Strong models like GPT-4o and Opus have quite
large context windows, so this sort of error is
typically only an issue when working with weaker models.
The easiest solution is to try and reduce the input tokens
by removing files from the chat.
It's best to only add the files that aider will need to *edit*
to complete your request.
- Use `/tokens` to see token usage.
- Use `/drop` to remove unneeded files from the chat session.
- Use `/clear` to clear the chat history.
- Break your code into smaller source files.
## Output token limits
Most models have quite small output limits, often as low
as 4k tokens.
If you ask aider to make a large change that affects a lot
of code, the LLM may hit output token limits
as it tries to send back all the changes.
To avoid hitting output token limits:
- Ask for smaller changes in each request.
- Break your code into smaller source files.
- Try using a stronger model like gpt-4o or opus that can return diffs.
## Other causes
Sometimes token limit errors are caused by
non-compliant API proxy servers
or bugs in the API server you are using to host a local model.
Aider has been well tested when directly connecting to
major
[LLM provider cloud APIs](https://aider.chat/docs/llms.html).
For serving local models,
[Ollama](https://aider.chat/docs/llms/ollama.html) is known to work well with aider.
Try using aider without an API proxy server
or directly with one of the recommended cloud APIs
and see if your token limit problems resolve.