diff --git a/aider/website/_data/qwen3_leaderboard.yml b/aider/website/_data/qwen3_leaderboard.yml index faa20fab7..e773bdee2 100644 --- a/aider/website/_data/qwen3_leaderboard.yml +++ b/aider/website/_data/qwen3_leaderboard.yml @@ -1,6 +1,6 @@ - dirname: 2025-05-08-03-20-24--qwen3-32b-default test_cases: 225 - model: Qwen3 32B on OpenRouter, all providers, default settings (thinking) + model: Qwen3 32B diff on OpenRouter, all providers, default settings (thinking) edit_format: diff commit_hash: aaacee5-dirty, aeaf259 pass_rate_1: 14.2 @@ -28,7 +28,7 @@ - dirname: 2025-05-08-03-22-37--qwen3-235b-defaults test_cases: 225 - model: Qwen3 235B A22B on OpenRouter, all providers, default settings (thinking) + model: Qwen3 235B A22B diff on OpenRouter, all providers, default settings (thinking) edit_format: diff commit_hash: aaacee5-dirty pass_rate_1: 17.3 @@ -57,7 +57,7 @@ - dirname: 2025-05-08-17-39-14--qwen3-235b-or-together-only test_cases: 225 - model: Qwen3 235B A22B on OpenRouter only TogetherAI, recommended /no_think settings + model: Qwen3 235B A22B diff on OpenRouter only TogetherAI, recommended /no_think settings edit_format: diff commit_hash: 328584e pass_rate_1: 28.0 @@ -86,7 +86,7 @@ - dirname: 2025-04-30-04-49-37--Qwen3-235B-A22B-whole-nothink test_cases: 225 - model: Qwen3-235B-A22B with VLLM, bfloat16, recommended /no_think settings + model: Qwen3-235B-A22B whole with VLLM, bfloat16, recommended /no_think settings edit_format: whole commit_hash: 0c383df-dirty pass_rate_1: 28.0 @@ -112,7 +112,7 @@ - dirname: 2025-04-30-04-49-50--Qwen3-235B-A22B-diff-nothink test_cases: 225 - model: Qwen3-235B-A22B with VLLM, bfloat16, recommended /no_think settings + model: Qwen3-235B-A22B diff with VLLM, bfloat16, recommended /no_think settings edit_format: diff commit_hash: 0c383df-dirty pass_rate_1: 29.8 @@ -138,7 +138,7 @@ - dirname: 2025-04-30-04-08-41--Qwen3-32B-whole-nothink test_cases: 225 - model: Qwen3-32B with VLLM, bfloat16, recommended /no_think settings + model: Qwen3-32B whole with VLLM, bfloat16, recommended /no_think settings edit_format: whole commit_hash: 0c383df-dirty pass_rate_1: 20.4 @@ -164,7 +164,7 @@ - dirname: 2025-04-30-04-08-51--Qwen3-32B-diff-nothink test_cases: 225 - model: Qwen3-32B with VLLM, bfloat16, recommended /no_think settings + model: Qwen3-32B diff with VLLM, bfloat16, recommended /no_think settings edit_format: diff commit_hash: 0c383df-dirty pass_rate_1: 20.4 diff --git a/aider/website/_posts/2025-05-08-qwen3.md b/aider/website/_posts/2025-05-08-qwen3.md index 01a544b25..79d53ea8d 100644 --- a/aider/website/_posts/2025-05-08-qwen3.md +++ b/aider/website/_posts/2025-05-08-qwen3.md @@ -14,10 +14,13 @@ Open source models are wonderful because anyone can serve them, but API providers can use very different inference settings, quantizations, etc. Below are collection of aider polyglot benchmark results for the new Qwen3 models. -Results are presented with various settings against various API providers, -with the hope of showcasing the strengths of these models and their providers. +Results are presented using both "diff" and "whole" +[edit formats](https://aider.chat/docs/more/edit-formats.html), +with various models settings, against various API providers. -See details for configuring Qwen3 after the results table. +See details on the +[model settings](https://aider.chat/docs/config/adv-model-settings.html#model-settings) +used after the results table. {: .note } This article is being updated as new results become available.