mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-24 14:25:00 +00:00
improved token limit err msgs and docs #678
This commit is contained in:
parent
0fc6b9beaa
commit
a30e656304
3 changed files with 150 additions and 7 deletions
|
@ -795,8 +795,11 @@ class Coder:
|
||||||
except ExhaustedContextWindow:
|
except ExhaustedContextWindow:
|
||||||
exhausted = True
|
exhausted = True
|
||||||
except litellm.exceptions.BadRequestError as err:
|
except litellm.exceptions.BadRequestError as err:
|
||||||
self.io.tool_error(f"BadRequestError: {err}")
|
if "ContextWindowExceededError" in err.message:
|
||||||
return
|
exhausted = True
|
||||||
|
else:
|
||||||
|
self.io.tool_error(f"BadRequestError: {err}")
|
||||||
|
return
|
||||||
except openai.BadRequestError as err:
|
except openai.BadRequestError as err:
|
||||||
if "maximum context length" in str(err):
|
if "maximum context length" in str(err):
|
||||||
exhausted = True
|
exhausted = True
|
||||||
|
@ -804,12 +807,8 @@ class Coder:
|
||||||
raise err
|
raise err
|
||||||
|
|
||||||
if exhausted:
|
if exhausted:
|
||||||
|
self.show_exhausted_error()
|
||||||
self.num_exhausted_context_windows += 1
|
self.num_exhausted_context_windows += 1
|
||||||
self.io.tool_error("The chat session is larger than the context window!\n")
|
|
||||||
self.commands.cmd_tokens("")
|
|
||||||
self.io.tool_error("\nTo reduce token usage:")
|
|
||||||
self.io.tool_error(" - Use /drop to remove unneeded files from the chat session.")
|
|
||||||
self.io.tool_error(" - Use /clear to clear chat history.")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.partial_response_function_call:
|
if self.partial_response_function_call:
|
||||||
|
@ -878,6 +877,63 @@ class Coder:
|
||||||
else:
|
else:
|
||||||
self.reflected_message = add_rel_files_message
|
self.reflected_message = add_rel_files_message
|
||||||
|
|
||||||
|
def show_exhausted_error(self):
|
||||||
|
output_tokens = 0
|
||||||
|
if self.partial_response_content:
|
||||||
|
output_tokens = self.main_model.token_count(self.partial_response_content)
|
||||||
|
max_output_tokens = self.main_model.info.get("max_output_tokens", 0)
|
||||||
|
|
||||||
|
input_tokens = self.main_model.token_count(self.format_messages())
|
||||||
|
max_input_tokens = self.main_model.info.get("max_input_tokens", 0)
|
||||||
|
|
||||||
|
total_tokens = input_tokens + output_tokens
|
||||||
|
|
||||||
|
if output_tokens >= max_output_tokens:
|
||||||
|
out_err = " -- exceeded output limit!"
|
||||||
|
else:
|
||||||
|
out_err = ""
|
||||||
|
|
||||||
|
if input_tokens >= max_input_tokens:
|
||||||
|
inp_err = " -- context window exhausted!"
|
||||||
|
else:
|
||||||
|
inp_err = ""
|
||||||
|
|
||||||
|
if total_tokens >= max_input_tokens:
|
||||||
|
tot_err = " -- context window exhausted!"
|
||||||
|
else:
|
||||||
|
tot_err = ""
|
||||||
|
|
||||||
|
res = ["", ""]
|
||||||
|
res.append(f"Model {self.main_model.name} has hit a token limit!")
|
||||||
|
res.append("")
|
||||||
|
res.append(f"Input tokens: {input_tokens} of {max_input_tokens}{inp_err}")
|
||||||
|
res.append(f"Output tokens: {output_tokens} of {max_output_tokens}{out_err}")
|
||||||
|
res.append(f"Total tokens: {total_tokens} of {max_input_tokens}{tot_err}")
|
||||||
|
|
||||||
|
if output_tokens >= max_output_tokens:
|
||||||
|
res.append("")
|
||||||
|
res.append("To reduce output tokens:")
|
||||||
|
res.append("- Ask for smaller changes in each request.")
|
||||||
|
res.append("- Break your code into smaller source files.")
|
||||||
|
if "diff" not in self.main_model.edit_format:
|
||||||
|
res.append(
|
||||||
|
"- Try using a stronger model like gpt-4o or opus that can return diffs."
|
||||||
|
)
|
||||||
|
|
||||||
|
if input_tokens >= max_input_tokens or total_tokens >= max_input_tokens:
|
||||||
|
res.append("")
|
||||||
|
res.append("To reduce input tokens:")
|
||||||
|
res.append("- Use /tokens to see token usage.")
|
||||||
|
res.append("- Use /drop to remove unneeded files from the chat session.")
|
||||||
|
res.append("- Use /clear to clear the chat history.")
|
||||||
|
res.append("- Break your code into smaller source files.")
|
||||||
|
|
||||||
|
res.append("")
|
||||||
|
res.append(f"For more info: {urls.token_limits}")
|
||||||
|
|
||||||
|
res = "".join([line + "\n" for line in res])
|
||||||
|
self.io.tool_error(res)
|
||||||
|
|
||||||
def lint_edited(self, fnames):
|
def lint_edited(self, fnames):
|
||||||
res = ""
|
res = ""
|
||||||
for fname in fnames:
|
for fname in fnames:
|
||||||
|
|
|
@ -5,3 +5,4 @@ git = "https://aider.chat/docs/git.html"
|
||||||
enable_playwrite = "https://aider.chat/docs/install/optional.html#enable-playwright"
|
enable_playwrite = "https://aider.chat/docs/install/optional.html#enable-playwright"
|
||||||
favicon = "https://aider.chat/assets/icons/favicon-32x32.png"
|
favicon = "https://aider.chat/assets/icons/favicon-32x32.png"
|
||||||
model_warnings = "https://aider.chat/docs/llms/warnings.html"
|
model_warnings = "https://aider.chat/docs/llms/warnings.html"
|
||||||
|
token_limits = "https://aider.chat/docs/toubleshooting/token-limits.html"
|
||||||
|
|
86
website/docs/troubleshooting/token-limits.md
Normal file
86
website/docs/troubleshooting/token-limits.md
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
---
|
||||||
|
parent: Troubleshooting
|
||||||
|
nav_order: 25
|
||||||
|
---
|
||||||
|
|
||||||
|
# Token limits
|
||||||
|
|
||||||
|
Every LLM has limits on how many tokens it can process:
|
||||||
|
|
||||||
|
- The model's **context window** limits how many tokens of
|
||||||
|
*input and output* it can process.
|
||||||
|
- Each model has limit on how many **output tokens** it can
|
||||||
|
produce.
|
||||||
|
|
||||||
|
Aider will report an error if a model responds indicating that
|
||||||
|
it has exceeded a token limit.
|
||||||
|
The error will include suggested actions to try and
|
||||||
|
avoid hitting token limits.
|
||||||
|
Here's an example error:
|
||||||
|
|
||||||
|
```
|
||||||
|
Model gpt-3.5-turbo has hit a token limit!
|
||||||
|
|
||||||
|
Input tokens: 768 of 16385
|
||||||
|
Output tokens: 4096 of 4096 -- exceeded output limit!
|
||||||
|
Total tokens: 4864 of 16385
|
||||||
|
|
||||||
|
To reduce output tokens:
|
||||||
|
- Ask for smaller changes in each request.
|
||||||
|
- Break your code into smaller source files.
|
||||||
|
- Try using a stronger model like gpt-4o or opus that can return diffs.
|
||||||
|
|
||||||
|
For more info: https://aider.chat/docs/token-limits.html
|
||||||
|
```
|
||||||
|
|
||||||
|
## Input tokens & context window size
|
||||||
|
|
||||||
|
The most common problem is trying to send too much data to a
|
||||||
|
model,
|
||||||
|
overflowing its context window.
|
||||||
|
Technically you can exhaust the context window if the input is
|
||||||
|
too large or if the input plus output are too large.
|
||||||
|
|
||||||
|
Strong models like GPT-4o and Opus have quite
|
||||||
|
large context windows, so this sort of error is
|
||||||
|
typically only an issue when working with weaker models.
|
||||||
|
|
||||||
|
The easiest solution is to try and reduce the input tokens
|
||||||
|
by removing files from the chat.
|
||||||
|
It's best to only add the files that aider will need to *edit*
|
||||||
|
to complete your request.
|
||||||
|
|
||||||
|
- Use `/tokens` to see token usage.
|
||||||
|
- Use `/drop` to remove unneeded files from the chat session.
|
||||||
|
- Use `/clear` to clear the chat history.
|
||||||
|
- Break your code into smaller source files.
|
||||||
|
|
||||||
|
## Output token limits
|
||||||
|
|
||||||
|
Most models have quite small output limits, often as low
|
||||||
|
as 4k tokens.
|
||||||
|
If you ask aider to make a large change that affects a lot
|
||||||
|
of code, the LLM may hit output token limits
|
||||||
|
as it tries to send back all the changes.
|
||||||
|
|
||||||
|
To avoid hitting output token limits:
|
||||||
|
|
||||||
|
- Ask for smaller changes in each request.
|
||||||
|
- Break your code into smaller source files.
|
||||||
|
- Try using a stronger model like gpt-4o or opus that can return diffs.
|
||||||
|
|
||||||
|
|
||||||
|
## Other causes
|
||||||
|
|
||||||
|
Sometimes token limit errors are caused by
|
||||||
|
non-compliant API proxy servers
|
||||||
|
or bugs in the API server you are using to host a local model.
|
||||||
|
Aider has been well tested when directly connecting to
|
||||||
|
major
|
||||||
|
[LLM provider cloud APIs](https://aider.chat/docs/llms.html).
|
||||||
|
For serving local models,
|
||||||
|
[Ollama](https://aider.chat/docs/llms/ollama.html) is known to work well with aider.
|
||||||
|
|
||||||
|
Try using aider without an API proxy server
|
||||||
|
or directly with one of the recommended cloud APIs
|
||||||
|
and see if your token limit problems resolve.
|
Loading…
Add table
Add a link
Reference in a new issue