From 3a331e55dc2f8fef342d20caaa3f78c2447fcfab Mon Sep 17 00:00:00 2001 From: ivanfioravanti Date: Sat, 23 Nov 2024 17:32:08 +0100 Subject: [PATCH 1/3] mlx 4bit diff --- aider/website/_data/quant.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/aider/website/_data/quant.yml b/aider/website/_data/quant.yml index baaefa658..806eafa5a 100644 --- a/aider/website/_data/quant.yml +++ b/aider/website/_data/quant.yml @@ -70,25 +70,25 @@ - dirname: 2024-11-22-17-53-35--qwen25-coder-32b-Instruct-4bit test_cases: 133 - model: mlx-community/Qwen2.5-Coder-32B-Instruct-4bit (whole) - edit_format: whole - commit_hash: 0ccf04a-dirty - pass_rate_1: 57.1 - pass_rate_2: 69.2 - percent_cases_well_formed: 100.0 - error_outputs: 70 - num_malformed_responses: 0 - num_with_malformed_responses: 0 - user_asks: 0 + model: mlx-community/Qwen2.5-Coder-32B-Instruct-4bit + edit_format: diff + commit_hash: a16dcab-dirty + pass_rate_1: 60.2 + pass_rate_2: 72.2 + percent_cases_well_formed: 88.7 + error_outputs: 31 + num_malformed_responses: 30 + num_with_malformed_responses: 15 + user_asks: 6 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 - exhausted_context_windows: 0 + exhausted_context_windows: 1 test_timeouts: 0 command: aider --model openai/mlx-community/Qwen2.5-Coder-32B-Instruct-4bit - date: 2024-11-22 + date: 2024-11-23 versions: 0.64.2.dev - seconds_per_case: 173.7 + seconds_per_case: 53.4 total_cost: 0.0000 - dirname: 2024-11-20-15-17-37--qwen25-32b-or-diff From 100744a952bd61ed4bbfea4cca9994f902493b8d Mon Sep 17 00:00:00 2001 From: ivanfioravanti Date: Sat, 23 Nov 2024 17:34:01 +0100 Subject: [PATCH 2/3] Article updated to reflect change in mlx test --- aider/website/_posts/2024-11-21-quantization.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/aider/website/_posts/2024-11-21-quantization.md b/aider/website/_posts/2024-11-21-quantization.md index 2d8391ac5..3fff8dd62 100644 --- a/aider/website/_posts/2024-11-21-quantization.md +++ b/aider/website/_posts/2024-11-21-quantization.md @@ -16,7 +16,7 @@ aider's code editing benchmark, rivaling closed source frontier models. But pay attention to how your model is being quantized, as it can strongly impact code editing skill. Heavily quantized models are often used by cloud API providers -and local model servers like Ollama. +and local model servers like Ollama or MLX. @@ -29,9 +29,7 @@ served both locally and from cloud providers. - The [HuggingFace BF16 weights](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) served via [glhf.chat](https://glhf.chat). - Hyperbolic labs API for [qwen2-5-coder-32b-instruct](https://app.hyperbolic.xyz/models/qwen2-5-coder-32b-instruct), which is using BF16. This result is probably within the expected variance of the HF result. -- A [4bit quant for mlx](https://t.co/cwX3DYX35D). -This is the only model which was benchmarked using the "whole" [edit format](https://aider.chat/docs/more/edit-formats.html). -The rest were benchmarked with the much more practical and challenging "diff"edit format. +- A [4bit quant for mlx](https://t.co/cwX3DYX35D). - The results from [OpenRouter's mix of providers](https://openrouter.ai/qwen/qwen-2.5-coder-32b-instruct/providers) which serve the model with different levels of quantization. - Ollama locally serving [qwen2.5-coder:32b-instruct-q4_K_M)](https://ollama.com/library/qwen2.5-coder:32b-instruct-q4_K_M), which has `Q4_K_M` quantization, with Ollama's default 2k context window. From 324430a6965a6ee078bfc74a1f9d5e7cb9fc5c88 Mon Sep 17 00:00:00 2001 From: ivanfioravanti Date: Sat, 23 Nov 2024 19:51:48 +0100 Subject: [PATCH 3/3] quant.yml mlx-community/Qwen2.5-Coder-32B-Instruct-8bit added --- aider/website/_data/quant.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/aider/website/_data/quant.yml b/aider/website/_data/quant.yml index 806eafa5a..0d96c54b6 100644 --- a/aider/website/_data/quant.yml +++ b/aider/website/_data/quant.yml @@ -91,6 +91,29 @@ seconds_per_case: 53.4 total_cost: 0.0000 +- dirname: 2024-11-23-15-07-20--qwen25-coder-32b-Instruct-8bit + test_cases: 133 + model: mlx-community/Qwen2.5-Coder-32B-Instruct-8bit + edit_format: diff + commit_hash: a16dcab-dirty + pass_rate_1: 59.4 + pass_rate_2: 72.2 + percent_cases_well_formed: 92.5 + error_outputs: 20 + num_malformed_responses: 15 + num_with_malformed_responses: 10 + user_asks: 7 + lazy_comments: 0 + syntax_errors: 0 + indentation_errors: 0 + exhausted_context_windows: 5 + test_timeouts: 2 + command: aider --model openai/mlx-community/Qwen2.5-Coder-32B-Instruct-8bit + date: 2024-11-23 + versions: 0.64.2.dev + seconds_per_case: 98.4 + total_cost: 0.0000 + - dirname: 2024-11-20-15-17-37--qwen25-32b-or-diff test_cases: 133 model: OpenRouter (mixed)