From 6517cb15ef3b53204d9598d0d66c06e608bfbab6 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 4 Feb 2025 12:47:33 -0800 Subject: [PATCH] copy --- aider/website/docs/llms/ollama.md | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/aider/website/docs/llms/ollama.md b/aider/website/docs/llms/ollama.md index 5207656f5..771b3022c 100644 --- a/aider/website/docs/llms/ollama.md +++ b/aider/website/docs/llms/ollama.md @@ -45,18 +45,10 @@ setx OLLAMA_API_KEY # Windows, restart shell after setx [Ollama uses a 2k context window by default](https://github.com/ollama/ollama/blob/main/docs/faq.md#how-can-i-specify-the-context-window-size), which is very small for working with aider. -Unlike most other LLM servers, Ollama does not throw an error if you submit -a request that exceeds the context window. -Instead, it just silently truncates the request by discarding the "oldest" messages -in the chat to make it fit within the context window. +By default, aider sets Ollama's context window +to be large enough for each request you send plus 8k tokens for the reply. -So if your context window is too small, you won't get an explicit error. -The biggest symptom will be that aider says it can't see (some of) the files -you added to the chat. -That's because ollama is silently discarding them because they exceed the context window. - -Aider sets Ollama's context window to 8k by default. -Larger context windows will allow you to work with larger amounts of code, +Larger context windows may be helpful to allow larger replies from the LLM but will use memory and increase latency. If you would like a larger context window @@ -67,6 +59,6 @@ like this: ``` - name: ollama/qwen2.5-coder:32b-instruct-fp16 extra_params: - num_ctx: 8192 + num_ctx: 65536 ```