From dbd7f51f5c1cd6a5a22072dd2bdcffe0eb65b9ef Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Fri, 22 Nov 2024 05:56:03 -0800 Subject: [PATCH] fix ollama models included in quant blog --- aider/website/_data/quant.yml | 71 +++++++++---------- .../website/_posts/2024-11-21-quantization.md | 14 ++-- 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/aider/website/_data/quant.yml b/aider/website/_data/quant.yml index 55538b63c..536ae25af 100644 --- a/aider/website/_data/quant.yml +++ b/aider/website/_data/quant.yml @@ -45,49 +45,48 @@ seconds_per_case: 40.7 total_cost: 0.1497 -- dirname: 2024-09-20-21-47-17--qwen2.5-32b-instruct-q8_0-whole +- dirname: 2024-11-21-23-33-47--ollama-qwen25-coder test_cases: 133 - model: ollama/qwen2.5:32b-instruct-q8_0 - edit_format: whole - commit_hash: 2753ac6 - pass_rate_1: 46.6 - pass_rate_2: 58.6 - percent_cases_well_formed: 100.0 - error_outputs: 0 - num_malformed_responses: 0 - num_with_malformed_responses: 0 - user_asks: 1 + model: qwen2.5-coder:32b-instruct-q4_K_M + edit_format: diff + commit_hash: 488c88d-dirty + pass_rate_1: 44.4 + pass_rate_2: 53.4 + percent_cases_well_formed: 44.4 + error_outputs: 231 + num_malformed_responses: 183 + num_with_malformed_responses: 74 + user_asks: 79 lazy_comments: 0 - syntax_errors: 0 + syntax_errors: 2 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 2 - command: aider --model ollama/qwen2.5:32b-instruct-q8_0 - date: 2024-09-20 - versions: 0.56.1.dev - seconds_per_case: 1763.7 + command: aider --model ollama/qwen2.5-coder:32b-instruct-q4_K_M + date: 2024-11-21 + versions: 0.64.2.dev + seconds_per_case: 86.7 total_cost: 0.0000 -- dirname: 2024-09-30-14-09-43--qwen2.5-32b-whole-2 +- dirname: 2024-11-22-03-33-30--ollama-qwen25-coder-krith-instruct test_cases: 133 - model: ollama/qwen2.5:32b - edit_format: whole - commit_hash: 765c4cb - pass_rate_1: 44.4 - pass_rate_2: 54.1 - percent_cases_well_formed: 100.0 - error_outputs: 0 - num_malformed_responses: 0 - num_with_malformed_responses: 0 - user_asks: 9 + model: ollama/krith/qwen2.5-coder-32b-instruct:IQ2_M + edit_format: diff + commit_hash: fbadfcf-dirty + pass_rate_1: 16.5 + pass_rate_2: 21.1 + percent_cases_well_formed: 60.9 + error_outputs: 1169 + num_malformed_responses: 148 + num_with_malformed_responses: 52 + user_asks: 58 lazy_comments: 0 - syntax_errors: 0 - indentation_errors: 0 + syntax_errors: 3 + indentation_errors: 1 exhausted_context_windows: 0 - test_timeouts: 3 - command: aider --model ollama/qwen2.5:32b - date: 2024-09-30 - versions: 0.58.1.dev - seconds_per_case: 134.9 - total_cost: 0.0000 - \ No newline at end of file + test_timeouts: 4 + command: aider --model ollama/krith/qwen2.5-coder-32b-instruct:IQ2_M + date: 2024-11-22 + versions: 0.64.2.dev + seconds_per_case: 169.7 + total_cost: 0.00 \ No newline at end of file diff --git a/aider/website/_posts/2024-11-21-quantization.md b/aider/website/_posts/2024-11-21-quantization.md index ff3ff58b0..42e2831a4 100644 --- a/aider/website/_posts/2024-11-21-quantization.md +++ b/aider/website/_posts/2024-11-21-quantization.md @@ -24,16 +24,16 @@ and local model servers like Ollama. {% include quant-chart.js %} -The graph above compares 4 different versions of the Qwen 2.5 32B model, +The graph above compares 3 different versions of the Qwen 2.5 Coder 32B model, served both locally and from cloud providers. - The [HuggingFace weights](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) served via [glhf.chat](https://glhf.chat). - The results from [OpenRouter's mix of providers](https://openrouter.ai/qwen/qwen-2.5-coder-32b-instruct/providers) which serve the model with different levels of quantization. -- Two Ollama models run locally. - -The best version of the model rivals GPT-4o, while the worst performer -is more like GPT-3.5 Turbo. +- Ollama locally serving [qwen2.5-coder:32b-instruct-q4_K_M)](https://ollama.com/library/qwen2.5-coder:32b-instruct-q4_K_M), which has `Q4_K_M` quantization. +- Ollama locally serving [krith/qwen2.5-coder-32b-instruct:IQ2_M](https://ollama.com/krith/qwen2.5-coder-32b-instruct), which has IQ2_M quantization. +The best version of the model rivals GPT-4o, while the worst performers +are more like GPT-3.5 Turbo level to completely useless. ## Choosing providers with OpenRouter @@ -41,3 +41,7 @@ OpenRouter allows you to ignore specific providers in your [preferences](https://openrouter.ai/settings/preferences). This can be effective to exclude highly quantized or otherwise undesirable providers. + +{: .note } +The original version of this article included incorrect Ollama models +that were not Qwen 2.5 Coder 32B.