diff --git a/aider/website/_data/quant.yml b/aider/website/_data/quant.yml index 0d96c54b6..cb5906334 100644 --- a/aider/website/_data/quant.yml +++ b/aider/website/_data/quant.yml @@ -24,7 +24,7 @@ - dirname: 2024-11-22-18-56-13--ollama-qwen2.5-coder:32b-instruct-fp16 test_cases: 132 - model: ollama/qwen2.5-coder:32b-instruct-fp16 (64k context) + model: Ollama fp16 edit_format: diff commit_hash: f06452c-dirty, 6a0a97c-dirty, 4e9ae16-dirty, 5506d0f-dirty pass_rate_1: 58.3 @@ -70,7 +70,7 @@ - dirname: 2024-11-22-17-53-35--qwen25-coder-32b-Instruct-4bit test_cases: 133 - model: mlx-community/Qwen2.5-Coder-32B-Instruct-4bit + model: mlx-community 4bit edit_format: diff commit_hash: a16dcab-dirty pass_rate_1: 60.2 @@ -93,7 +93,7 @@ - dirname: 2024-11-23-15-07-20--qwen25-coder-32b-Instruct-8bit test_cases: 133 - model: mlx-community/Qwen2.5-Coder-32B-Instruct-8bit + model: mlx-community 8bit edit_format: diff commit_hash: a16dcab-dirty pass_rate_1: 59.4 @@ -137,26 +137,25 @@ seconds_per_case: 40.7 total_cost: 0.1497 -- dirname: 2024-11-21-23-33-47--ollama-qwen25-coder +- dirname: 2024-11-23-21-08-53--ollama-qwen2.5-coder:32b-instruct-q4_K_M-8kctx test_cases: 133 - model: Ollama Q4_K_M + model: Ollama q4_K_M edit_format: diff - commit_hash: 488c88d-dirty - pass_rate_1: 44.4 - pass_rate_2: 53.4 - percent_cases_well_formed: 44.4 - error_outputs: 231 - num_malformed_responses: 183 - num_with_malformed_responses: 74 - user_asks: 79 + commit_hash: baa1335-dirty, e63df83-dirty, ff8c1aa-dirty + pass_rate_1: 54.9 + pass_rate_2: 66.9 + percent_cases_well_formed: 94.0 + error_outputs: 21 + num_malformed_responses: 21 + num_with_malformed_responses: 8 + user_asks: 5 lazy_comments: 0 - syntax_errors: 2 + syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 - test_timeouts: 2 + test_timeouts: 3 command: aider --model ollama/qwen2.5-coder:32b-instruct-q4_K_M - date: 2024-11-21 + date: 2024-11-23 versions: 0.64.2.dev - seconds_per_case: 86.7 - total_cost: 0.0000 - + seconds_per_case: 35.7 + total_cost: 0.0000 \ No newline at end of file diff --git a/aider/website/_posts/2024-11-21-quantization.md b/aider/website/_posts/2024-11-21-quantization.md index 3fff8dd62..3d9372194 100644 --- a/aider/website/_posts/2024-11-21-quantization.md +++ b/aider/website/_posts/2024-11-21-quantization.md @@ -18,7 +18,7 @@ can strongly impact code editing skill. Heavily quantized models are often used by cloud API providers and local model servers like Ollama or MLX. - + -## Setting the context window size +## Setting Ollama's context window size [Ollama uses a 2k context window by default](https://github.com/ollama/ollama/blob/main/docs/faq.md#how-can-i-specify-the-context-window-size), which is very small for working with aider. +All of the Ollama results above were collected with at least an 8k context window, which +is large enough to attempt all the coding problems in the benchmark. + You can set the Ollama server's context window with a [`.aider.model.settings.yml` file](https://aider.chat/docs/config/adv-model-settings.html#model-settings) like this: @@ -112,7 +115,7 @@ like this: ``` - name: aider/extra_params extra_params: - num_ctx: 65536 + num_ctx: 8192 ``` That uses the special model name `aider/extra_params` to set it for *all* models. You should probably use a specific model name like: @@ -120,7 +123,7 @@ That uses the special model name `aider/extra_params` to set it for *all* models ``` - name: ollama/qwen2.5-coder:32b-instruct-fp16 extra_params: - num_ctx: 65536 + num_ctx: 8192 ``` ## Choosing providers with OpenRouter @@ -130,3 +133,8 @@ OpenRouter allows you to ignore specific providers in your This can be effective to exclude highly quantized or otherwise undesirable providers. + +{: .note } +Earlier versions of this article included incorrect Ollama models, +and also included some Ollama results with the too small default 2k +context window.