From 73de0ea8beb15fc373c7bb722900bcfd3545e115 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sat, 23 Nov 2024 20:18:19 -0800 Subject: [PATCH] copy --- aider/website/_data/quant.yml | 25 ++++++++++++++++++- .../website/_posts/2024-11-21-quantization.md | 1 - 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/aider/website/_data/quant.yml b/aider/website/_data/quant.yml index ac177aa4f..3445144e9 100644 --- a/aider/website/_data/quant.yml +++ b/aider/website/_data/quant.yml @@ -181,4 +181,27 @@ date: 2024-11-24 versions: 0.64.2.dev seconds_per_case: 17.5 - total_cost: 0.0000 \ No newline at end of file + total_cost: 0.0000 + +- dirname: 2024-11-24-04-12-58--fireworks-qwen-diff + test_cases: 133 + model: Fireworks (unknown quant) + edit_format: diff + commit_hash: 757eac0 + pass_rate_1: 57.9 + pass_rate_2: 72.2 + percent_cases_well_formed: 94.0 + error_outputs: 23 + num_malformed_responses: 19 + num_with_malformed_responses: 8 + user_asks: 8 + lazy_comments: 0 + syntax_errors: 6 + indentation_errors: 0 + exhausted_context_windows: 4 + test_timeouts: 1 + command: aider --model fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct + date: 2024-11-24 + versions: 0.64.2.dev + seconds_per_case: 10.4 + total_cost: 0.5759 \ No newline at end of file diff --git a/aider/website/_posts/2024-11-21-quantization.md b/aider/website/_posts/2024-11-21-quantization.md index c059a892f..6f5104b94 100644 --- a/aider/website/_posts/2024-11-21-quantization.md +++ b/aider/website/_posts/2024-11-21-quantization.md @@ -28,7 +28,6 @@ The graph above compares different versions of the Qwen 2.5 Coder 32B Instruct m served both locally and from cloud providers. - The [HuggingFace BF16 weights](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) served via [glhf.chat](https://glhf.chat). -- Hyperbolic labs API for [qwen2-5-coder-32b-instruct](https://app.hyperbolic.xyz/models/qwen2-5-coder-32b-instruct), which is using BF16. This result is probably within the expected variance of the HF result. - [4bit and 8bit quants for mlx](https://t.co/cwX3DYX35D). - The results from [OpenRouter's mix of providers](https://openrouter.ai/qwen/qwen-2.5-coder-32b-instruct/providers) which serve the model with different levels of quantization. - Ollama locally serving different quantizations from the [Ollama model library](https://ollama.com/library/qwen2.5-coder:32b-instruct-q4_K_M).