diff --git a/aider/website/_data/quant.yml b/aider/website/_data/quant.yml
index 0d96c54b6..cb5906334 100644
--- a/aider/website/_data/quant.yml
+++ b/aider/website/_data/quant.yml
@@ -24,7 +24,7 @@
- dirname: 2024-11-22-18-56-13--ollama-qwen2.5-coder:32b-instruct-fp16
test_cases: 132
- model: ollama/qwen2.5-coder:32b-instruct-fp16 (64k context)
+ model: Ollama fp16
edit_format: diff
commit_hash: f06452c-dirty, 6a0a97c-dirty, 4e9ae16-dirty, 5506d0f-dirty
pass_rate_1: 58.3
@@ -70,7 +70,7 @@
- dirname: 2024-11-22-17-53-35--qwen25-coder-32b-Instruct-4bit
test_cases: 133
- model: mlx-community/Qwen2.5-Coder-32B-Instruct-4bit
+ model: mlx-community 4bit
edit_format: diff
commit_hash: a16dcab-dirty
pass_rate_1: 60.2
@@ -93,7 +93,7 @@
- dirname: 2024-11-23-15-07-20--qwen25-coder-32b-Instruct-8bit
test_cases: 133
- model: mlx-community/Qwen2.5-Coder-32B-Instruct-8bit
+ model: mlx-community 8bit
edit_format: diff
commit_hash: a16dcab-dirty
pass_rate_1: 59.4
@@ -137,26 +137,25 @@
seconds_per_case: 40.7
total_cost: 0.1497
-- dirname: 2024-11-21-23-33-47--ollama-qwen25-coder
+- dirname: 2024-11-23-21-08-53--ollama-qwen2.5-coder:32b-instruct-q4_K_M-8kctx
test_cases: 133
- model: Ollama Q4_K_M
+ model: Ollama q4_K_M
edit_format: diff
- commit_hash: 488c88d-dirty
- pass_rate_1: 44.4
- pass_rate_2: 53.4
- percent_cases_well_formed: 44.4
- error_outputs: 231
- num_malformed_responses: 183
- num_with_malformed_responses: 74
- user_asks: 79
+ commit_hash: baa1335-dirty, e63df83-dirty, ff8c1aa-dirty
+ pass_rate_1: 54.9
+ pass_rate_2: 66.9
+ percent_cases_well_formed: 94.0
+ error_outputs: 21
+ num_malformed_responses: 21
+ num_with_malformed_responses: 8
+ user_asks: 5
lazy_comments: 0
- syntax_errors: 2
+ syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
- test_timeouts: 2
+ test_timeouts: 3
command: aider --model ollama/qwen2.5-coder:32b-instruct-q4_K_M
- date: 2024-11-21
+ date: 2024-11-23
versions: 0.64.2.dev
- seconds_per_case: 86.7
- total_cost: 0.0000
-
+ seconds_per_case: 35.7
+ total_cost: 0.0000
\ No newline at end of file
diff --git a/aider/website/_posts/2024-11-21-quantization.md b/aider/website/_posts/2024-11-21-quantization.md
index 3fff8dd62..3d9372194 100644
--- a/aider/website/_posts/2024-11-21-quantization.md
+++ b/aider/website/_posts/2024-11-21-quantization.md
@@ -18,7 +18,7 @@ can strongly impact code editing skill.
Heavily quantized models are often used by cloud API providers
and local model servers like Ollama or MLX.
-
+
-## Setting the context window size
+## Setting Ollama's context window size
[Ollama uses a 2k context window by default](https://github.com/ollama/ollama/blob/main/docs/faq.md#how-can-i-specify-the-context-window-size),
which is very small for working with aider.
+All of the Ollama results above were collected with at least an 8k context window, which
+is large enough to attempt all the coding problems in the benchmark.
+
You can set the Ollama server's context window with a
[`.aider.model.settings.yml` file](https://aider.chat/docs/config/adv-model-settings.html#model-settings)
like this:
@@ -112,7 +115,7 @@ like this:
```
- name: aider/extra_params
extra_params:
- num_ctx: 65536
+ num_ctx: 8192
```
That uses the special model name `aider/extra_params` to set it for *all* models. You should probably use a specific model name like:
@@ -120,7 +123,7 @@ That uses the special model name `aider/extra_params` to set it for *all* models
```
- name: ollama/qwen2.5-coder:32b-instruct-fp16
extra_params:
- num_ctx: 65536
+ num_ctx: 8192
```
## Choosing providers with OpenRouter
@@ -130,3 +133,8 @@ OpenRouter allows you to ignore specific providers in your
This can be effective to exclude highly quantized or otherwise
undesirable providers.
+
+{: .note }
+Earlier versions of this article included incorrect Ollama models,
+and also included some Ollama results with the too small default 2k
+context window.