copy

2025-05-31 01:35:00 +00:00 · 2025-05-08 11:41:51 -07:00 · 2025-05-08 11:41:51 -07:00 · acd7309b78
commit acd7309b78
parent d5ea078f24
2 changed files with 13 additions and 10 deletions
--- a/aider/website/_data/qwen3_leaderboard.yml
+++ b/aider/website/_data/qwen3_leaderboard.yml
@ -1,6 +1,6 @@
 - dirname: 2025-05-08-03-20-24--qwen3-32b-default
  test_cases: 225
-  model: Qwen3 32B on OpenRouter, all providers, default settings (thinking)
+  model: Qwen3 32B diff on OpenRouter, all providers, default settings (thinking)
  edit_format: diff
  commit_hash: aaacee5-dirty, aeaf259
  pass_rate_1: 14.2
@ -28,7 +28,7 @@

 - dirname: 2025-05-08-03-22-37--qwen3-235b-defaults
  test_cases: 225
-  model: Qwen3 235B A22B on OpenRouter, all providers, default settings (thinking)
+  model: Qwen3 235B A22B diff on OpenRouter, all providers, default settings (thinking)
  edit_format: diff
  commit_hash: aaacee5-dirty
  pass_rate_1: 17.3
@ -57,7 +57,7 @@

 - dirname: 2025-05-08-17-39-14--qwen3-235b-or-together-only
  test_cases: 225
-  model: Qwen3 235B A22B on OpenRouter only TogetherAI, recommended /no_think settings
+  model: Qwen3 235B A22B diff on OpenRouter only TogetherAI, recommended /no_think settings
  edit_format: diff
  commit_hash: 328584e
  pass_rate_1: 28.0
@ -86,7 +86,7 @@

 - dirname: 2025-04-30-04-49-37--Qwen3-235B-A22B-whole-nothink
  test_cases: 225
-  model: Qwen3-235B-A22B with VLLM, bfloat16, recommended /no_think settings
+  model: Qwen3-235B-A22B whole with VLLM, bfloat16, recommended /no_think settings
  edit_format: whole
  commit_hash: 0c383df-dirty
  pass_rate_1: 28.0
@ -112,7 +112,7 @@

 - dirname: 2025-04-30-04-49-50--Qwen3-235B-A22B-diff-nothink
  test_cases: 225
-  model: Qwen3-235B-A22B with VLLM, bfloat16, recommended /no_think settings
+  model: Qwen3-235B-A22B diff with VLLM, bfloat16, recommended /no_think settings
  edit_format: diff
  commit_hash: 0c383df-dirty
  pass_rate_1: 29.8
@ -138,7 +138,7 @@

 - dirname: 2025-04-30-04-08-41--Qwen3-32B-whole-nothink
  test_cases: 225
-  model: Qwen3-32B with VLLM, bfloat16, recommended /no_think settings
+  model: Qwen3-32B whole with VLLM, bfloat16, recommended /no_think settings
  edit_format: whole
  commit_hash: 0c383df-dirty
  pass_rate_1: 20.4
@ -164,7 +164,7 @@

 - dirname: 2025-04-30-04-08-51--Qwen3-32B-diff-nothink
  test_cases: 225
-  model: Qwen3-32B with VLLM, bfloat16, recommended /no_think settings
+  model: Qwen3-32B diff with VLLM, bfloat16, recommended /no_think settings
  edit_format: diff
  commit_hash: 0c383df-dirty
  pass_rate_1: 20.4
--- a/aider/website/_posts/2025-05-08-qwen3.md
+++ b/aider/website/_posts/2025-05-08-qwen3.md
@ -14,10 +14,13 @@ Open source models are wonderful because anyone can serve them,
 but API providers can use very different inference settings, quantizations, etc.

 Below are collection of aider polyglot benchmark results for the new Qwen3 models.
-Results are presented with various settings against various API providers,
-with the hope of showcasing the strengths of these models and their providers.
+Results are presented using both "diff" and "whole" 
+[edit formats](https://aider.chat/docs/more/edit-formats.html),
+with various models settings, against various API providers.

-See details for configuring Qwen3 after the results table.
+See details on the 
+[model settings](https://aider.chat/docs/config/adv-model-settings.html#model-settings) 
+used after the results table.

 {: .note }
 This article is being updated as new results become available.