diff --git a/aider/models.py b/aider/models.py index 0bca766c6..49b324bd5 100644 --- a/aider/models.py +++ b/aider/models.py @@ -938,7 +938,7 @@ class Model(ModelSettings): self.edit_format = "diff" self.editor_edit_format = "editor-diff" self.use_repo_map = True - if "ollama" in model: + if model.startswith("ollama/") or model.startswith("ollama_chat/"): self.extra_params = dict(num_ctx=8 * 1024) return # <-- diff --git a/aider/website/_posts/2024-11-21-quantization.md b/aider/website/_posts/2024-11-21-quantization.md index 5db7366d3..d3712eaca 100644 --- a/aider/website/_posts/2024-11-21-quantization.md +++ b/aider/website/_posts/2024-11-21-quantization.md @@ -116,19 +116,12 @@ in the chat to make it fit within the context window. All of the Ollama results above were collected with at least an 8k context window, which is large enough to attempt all the coding problems in the benchmark. +Aider sets Ollama's context window to 8k by default. -You can set the Ollama server's context window with a +You can change the Ollama server's context window with a [`.aider.model.settings.yml` file](https://aider.chat/docs/config/adv-model-settings.html#model-settings) like this: -``` -- name: aider/extra_params - extra_params: - num_ctx: 8192 -``` - -That uses the special model name `aider/extra_params` to set it for *all* models. You should probably use a specific model name like: - ``` - name: ollama/qwen2.5-coder:32b-instruct-fp16 extra_params: diff --git a/aider/website/docs/llms/ollama.md b/aider/website/docs/llms/ollama.md index 328600b33..9043c3ac1 100644 --- a/aider/website/docs/llms/ollama.md +++ b/aider/website/docs/llms/ollama.md @@ -44,28 +44,25 @@ setx OLLAMA_API_KEY # Windows, restart shell after setx [Ollama uses a 2k context window by default](https://github.com/ollama/ollama/blob/main/docs/faq.md#how-can-i-specify-the-context-window-size), which is very small for working with aider. -Unlike most other LLM servers, Ollama does not throw an error if you submit -a request that exceeds the context window. -Instead, it just silently truncates the request by discarding the "oldest" messages -in the chat to make it fit within the context window. -All of the Ollama results above were collected with at least an 8k context window, which -is large enough to attempt all the coding problems in the benchmark. - -You can set the Ollama server's context window with a +Aider sets Ollama's context window to 8k by default. +If you would like +a large context window +you can use a [`.aider.model.settings.yml` file](https://aider.chat/docs/config/adv-model-settings.html#model-settings) like this: -``` -- name: aider/extra_params - extra_params: - num_ctx: 8192 -``` - -That uses the special model name `aider/extra_params` to set it for *all* models. You should probably use a specific model name like: - ``` - name: ollama/qwen2.5-coder:32b-instruct-fp16 extra_params: num_ctx: 8192 ``` + +Unlike most other LLM servers, Ollama does not throw an error if you submit +a request that exceeds the context window. +Instead, it just silently truncates the request by discarding the "oldest" messages +in the chat to make it fit within the context window. +So if your context window is too small, you won't get an error. +Aider will probably just fail to work well and experience +a lot of +[file editing problems](https://aider.chat/docs/troubleshooting/edit-errors.html).