From dbd7f51f5c1cd6a5a22072dd2bdcffe0eb65b9ef Mon Sep 17 00:00:00 2001
From: Paul Gauthier <aider@paulg.org>
Date: Fri, 22 Nov 2024 05:56:03 -0800
Subject: [PATCH] fix ollama models included in quant blog

---
 aider/website/_data/quant.yml                 | 71 +++++++++----------
 .../website/_posts/2024-11-21-quantization.md | 14 ++--
 2 files changed, 44 insertions(+), 41 deletions(-)
diff --git a/aider/website/_data/quant.yml b/aider/website/_data/quant.yml
index 55538b63c..536ae25af 100644
--- a/aider/website/_data/quant.yml
+++ b/aider/website/_data/quant.yml
@@ -45,49 +45,48 @@
   seconds_per_case: 40.7
   total_cost: 0.1497
 
-- dirname: 2024-09-20-21-47-17--qwen2.5-32b-instruct-q8_0-whole
+- dirname: 2024-11-21-23-33-47--ollama-qwen25-coder
   test_cases: 133
-  model: ollama/qwen2.5:32b-instruct-q8_0
-  edit_format: whole
-  commit_hash: 2753ac6
-  pass_rate_1: 46.6
-  pass_rate_2: 58.6
-  percent_cases_well_formed: 100.0
-  error_outputs: 0
-  num_malformed_responses: 0
-  num_with_malformed_responses: 0
-  user_asks: 1
+  model: qwen2.5-coder:32b-instruct-q4_K_M
+  edit_format: diff
+  commit_hash: 488c88d-dirty
+  pass_rate_1: 44.4
+  pass_rate_2: 53.4
+  percent_cases_well_formed: 44.4
+  error_outputs: 231
+  num_malformed_responses: 183
+  num_with_malformed_responses: 74
+  user_asks: 79
   lazy_comments: 0
-  syntax_errors: 0
+  syntax_errors: 2
   indentation_errors: 0
   exhausted_context_windows: 0
   test_timeouts: 2
-  command: aider --model ollama/qwen2.5:32b-instruct-q8_0
-  date: 2024-09-20
-  versions: 0.56.1.dev
-  seconds_per_case: 1763.7
+  command: aider --model ollama/qwen2.5-coder:32b-instruct-q4_K_M
+  date: 2024-11-21
+  versions: 0.64.2.dev
+  seconds_per_case: 86.7
   total_cost: 0.0000
 
-- dirname: 2024-09-30-14-09-43--qwen2.5-32b-whole-2
+- dirname: 2024-11-22-03-33-30--ollama-qwen25-coder-krith-instruct
   test_cases: 133
-  model: ollama/qwen2.5:32b
-  edit_format: whole
-  commit_hash: 765c4cb
-  pass_rate_1: 44.4
-  pass_rate_2: 54.1
-  percent_cases_well_formed: 100.0
-  error_outputs: 0
-  num_malformed_responses: 0
-  num_with_malformed_responses: 0
-  user_asks: 9
+  model: ollama/krith/qwen2.5-coder-32b-instruct:IQ2_M
+  edit_format: diff
+  commit_hash: fbadfcf-dirty
+  pass_rate_1: 16.5
+  pass_rate_2: 21.1
+  percent_cases_well_formed: 60.9
+  error_outputs: 1169
+  num_malformed_responses: 148
+  num_with_malformed_responses: 52
+  user_asks: 58
   lazy_comments: 0
-  syntax_errors: 0
-  indentation_errors: 0
+  syntax_errors: 3
+  indentation_errors: 1
   exhausted_context_windows: 0
-  test_timeouts: 3
-  command: aider --model ollama/qwen2.5:32b
-  date: 2024-09-30
-  versions: 0.58.1.dev
-  seconds_per_case: 134.9
-  total_cost: 0.0000
-  
\ No newline at end of file
+  test_timeouts: 4
+  command: aider --model ollama/krith/qwen2.5-coder-32b-instruct:IQ2_M
+  date: 2024-11-22
+  versions: 0.64.2.dev
+  seconds_per_case: 169.7
+  total_cost: 0.00
\ No newline at end of file
diff --git a/aider/website/_posts/2024-11-21-quantization.md b/aider/website/_posts/2024-11-21-quantization.md
index ff3ff58b0..42e2831a4 100644
--- a/aider/website/_posts/2024-11-21-quantization.md
+++ b/aider/website/_posts/2024-11-21-quantization.md
@@ -24,16 +24,16 @@ and local model servers like Ollama.
 {% include quant-chart.js %}
 </script>
 
-The graph above compares 4 different versions of the Qwen 2.5 32B model,
+The graph above compares 3 different versions of the Qwen 2.5 Coder 32B model,
 served both locally and from cloud providers.
 
 - The [HuggingFace weights](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) served via [glhf.chat](https://glhf.chat).
 - The results from [OpenRouter's mix of providers](https://openrouter.ai/qwen/qwen-2.5-coder-32b-instruct/providers) which serve the model with different levels of quantization.
-- Two Ollama models run locally.
-
-The best version of the model rivals GPT-4o, while the worst performer
-is more like GPT-3.5 Turbo.
+- Ollama locally serving [qwen2.5-coder:32b-instruct-q4_K_M)](https://ollama.com/library/qwen2.5-coder:32b-instruct-q4_K_M), which has `Q4_K_M` quantization.
+- Ollama locally serving [krith/qwen2.5-coder-32b-instruct:IQ2_M](https://ollama.com/krith/qwen2.5-coder-32b-instruct), which has IQ2_M quantization.
 
+The best version of the model rivals GPT-4o, while the worst performers
+are more like GPT-3.5 Turbo level to completely useless.
 
 ## Choosing providers with OpenRouter
 
@@ -41,3 +41,7 @@ OpenRouter allows you to ignore specific providers in your
 [preferences](https://openrouter.ai/settings/preferences).
 This can be effective to exclude highly quantized or otherwise
 undesirable providers.
+
+{: .note }
+The original version of this article included incorrect Ollama models
+that were not Qwen 2.5 Coder 32B.