mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-31 17:55:01 +00:00
copy
This commit is contained in:
parent
43dd9ef8a5
commit
8776830306
3 changed files with 241 additions and 28 deletions
|
@ -1412,27 +1412,27 @@
|
|||
use_repo_map: true
|
||||
weak_model_name: openrouter/google/gemini-2.0-flash-001
|
||||
|
||||
- name: openrouter/qwen/qwen3-235b-a22b
|
||||
system_prompt_prefix: "/no_think"
|
||||
use_temperature: 0.7
|
||||
extra_params:
|
||||
max_tokens: 24000
|
||||
top_p: 0.8
|
||||
top_k: 20
|
||||
min_p: 0.0
|
||||
temperature: 0.7
|
||||
extra_body:
|
||||
provider:
|
||||
order: ["Together"]
|
||||
#- name: openrouter/qwen/qwen3-235b-a22b
|
||||
# system_prompt_prefix: "/no_think"
|
||||
# use_temperature: 0.7
|
||||
# extra_params:
|
||||
# max_tokens: 24000
|
||||
# top_p: 0.8
|
||||
# top_k: 20
|
||||
# min_p: 0.0
|
||||
# temperature: 0.7
|
||||
# extra_body:
|
||||
# provider:
|
||||
# order: ["Together"]
|
||||
|
||||
- name: together_ai/Qwen/Qwen3-235B-A22B-fp8-tput
|
||||
system_prompt_prefix: "/no_think"
|
||||
use_temperature: 0.7
|
||||
reasoning_tag: think
|
||||
extra_params:
|
||||
max_tokens: 24000
|
||||
top_p: 0.8
|
||||
top_k: 20
|
||||
min_p: 0.0
|
||||
temperature: 0.7
|
||||
#- name: together_ai/Qwen/Qwen3-235B-A22B-fp8-tput
|
||||
# system_prompt_prefix: "/no_think"
|
||||
# use_temperature: 0.7
|
||||
# reasoning_tag: think
|
||||
# extra_params:
|
||||
# max_tokens: 24000
|
||||
# top_p: 0.8
|
||||
# top_k: 20
|
||||
# min_p: 0.0
|
||||
# temperature: 0.7
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
- dirname: 2025-05-08-03-20-24--qwen3-32b-default
|
||||
test_cases: 225
|
||||
model: Qwen3 32B
|
||||
model: Qwen3 32B on OpenRouter, all providers, default settings (thinking)
|
||||
edit_format: diff
|
||||
commit_hash: aaacee5-dirty, aeaf259
|
||||
pass_rate_1: 14.2
|
||||
|
@ -28,7 +28,7 @@
|
|||
|
||||
- dirname: 2025-05-08-03-22-37--qwen3-235b-defaults
|
||||
test_cases: 225
|
||||
model: Qwen3 235B A22B
|
||||
model: Qwen3 235B A22B on OpenRouter, all providers, default settings (thinking)
|
||||
edit_format: diff
|
||||
commit_hash: aaacee5-dirty
|
||||
pass_rate_1: 17.3
|
||||
|
@ -53,3 +53,137 @@
|
|||
versions: 0.82.4.dev
|
||||
seconds_per_case: 428.1
|
||||
total_cost: 1.8037
|
||||
|
||||
|
||||
- dirname: 2025-05-08-17-39-14--qwen3-235b-or-together-only
|
||||
test_cases: 225
|
||||
model: Qwen3 235B A22B on OpenRouter only TogetherAI, recommended /no_think settings
|
||||
edit_format: diff
|
||||
commit_hash: 328584e
|
||||
pass_rate_1: 28.0
|
||||
pass_rate_2: 54.7
|
||||
pass_num_1: 63
|
||||
pass_num_2: 123
|
||||
percent_cases_well_formed: 90.7
|
||||
error_outputs: 39
|
||||
num_malformed_responses: 32
|
||||
num_with_malformed_responses: 21
|
||||
user_asks: 106
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
prompt_tokens: 2816606
|
||||
completion_tokens: 362346
|
||||
test_timeouts: 2
|
||||
total_tests: 225
|
||||
command: aider --model openrouter/qwen/qwen3-235b-a22b
|
||||
date: 2025-05-08
|
||||
versions: 0.82.4.dev
|
||||
seconds_per_case: 77.2
|
||||
total_cost: 0.6399
|
||||
|
||||
|
||||
- dirname: 2025-04-30-04-49-37--Qwen3-235B-A22B-whole-nothink
|
||||
test_cases: 225
|
||||
model: Qwen3-235B-A22B with VLLM, bfloat16, recommended /no_think settings
|
||||
edit_format: whole
|
||||
commit_hash: 0c383df-dirty
|
||||
pass_rate_1: 28.0
|
||||
pass_rate_2: 65.3
|
||||
pass_num_1: 63
|
||||
pass_num_2: 147
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 3
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 166
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 3
|
||||
test_timeouts: 0
|
||||
total_tests: 225
|
||||
command: aider --model openai/Qwen3-235B-A22B
|
||||
date: 2025-04-30
|
||||
versions: 0.81.4.dev
|
||||
seconds_per_case: 166.0
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2025-04-30-04-49-50--Qwen3-235B-A22B-diff-nothink
|
||||
test_cases: 225
|
||||
model: Qwen3-235B-A22B with VLLM, bfloat16, recommended /no_think settings
|
||||
edit_format: diff
|
||||
commit_hash: 0c383df-dirty
|
||||
pass_rate_1: 29.8
|
||||
pass_rate_2: 61.3
|
||||
pass_num_1: 67
|
||||
pass_num_2: 138
|
||||
percent_cases_well_formed: 94.7
|
||||
error_outputs: 25
|
||||
num_malformed_responses: 25
|
||||
num_with_malformed_responses: 12
|
||||
user_asks: 97
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 2
|
||||
total_tests: 225
|
||||
command: aider --model openai/Qwen3-235B-A22B
|
||||
date: 2025-04-30
|
||||
versions: 0.81.4.dev
|
||||
seconds_per_case: 158.2
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2025-04-30-04-08-41--Qwen3-32B-whole-nothink
|
||||
test_cases: 225
|
||||
model: Qwen3-32B with VLLM, bfloat16, recommended /no_think settings
|
||||
edit_format: whole
|
||||
commit_hash: 0c383df-dirty
|
||||
pass_rate_1: 20.4
|
||||
pass_rate_2: 45.8
|
||||
pass_num_1: 46
|
||||
pass_num_2: 103
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 3
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 94
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 3
|
||||
test_timeouts: 5
|
||||
total_tests: 225
|
||||
command: aider --model openai/Qwen3-32B
|
||||
date: 2025-04-30
|
||||
versions: 0.81.4.dev
|
||||
seconds_per_case: 48.1
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2025-04-30-04-08-51--Qwen3-32B-diff-nothink
|
||||
test_cases: 225
|
||||
model: Qwen3-32B with VLLM, bfloat16, recommended /no_think settings
|
||||
edit_format: diff
|
||||
commit_hash: 0c383df-dirty
|
||||
pass_rate_1: 20.4
|
||||
pass_rate_2: 41.3
|
||||
pass_num_1: 46
|
||||
pass_num_2: 93
|
||||
percent_cases_well_formed: 94.2
|
||||
error_outputs: 17
|
||||
num_malformed_responses: 14
|
||||
num_with_malformed_responses: 13
|
||||
user_asks: 83
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 3
|
||||
test_timeouts: 4
|
||||
total_tests: 225
|
||||
command: aider --model openai/Qwen3-32B
|
||||
date: 2025-04-30
|
||||
versions: 0.81.4.dev
|
||||
seconds_per_case: 59.4
|
||||
total_cost: 0.0000
|
|
@ -1,13 +1,29 @@
|
|||
---
|
||||
layout: post
|
||||
title: Qwen3 Benchmark Results
|
||||
title: Qwen3 benchmark results
|
||||
excerpt: "Benchmark results for Qwen3 models using the Aider polyglot coding benchmark."
|
||||
date: 2025-05-08
|
||||
---
|
||||
|
||||
You can add some introductory text for your blog post here.
|
||||
# Qwen3 results on the aider polyglot benchmark
|
||||
|
||||
<h2 id="leaderboard-title">Qwen3 polyglot coding leaderboard</h2>
|
||||
As [previously discussed when Qwen2.5 was released](/2024/11/21/quantization.html),
|
||||
details matter when working with open source models for AI coding.
|
||||
Proprietary models are served by their creators or trusted providers with stable inference settings.
|
||||
Open source models are wonderful because anyone can serve them,
|
||||
but API providers can use very different inference settings, quantizations, etc.
|
||||
|
||||
Below are collection of aider polyglot benchmark results for the new Qwen3 models.
|
||||
Results are presented with various settings against various API providers,
|
||||
with the hope of showcasing the strengths of these models and its providers.
|
||||
|
||||
{: .note }
|
||||
This article is being updated as new results become available.
|
||||
|
||||
|
||||
|
||||
|
||||
<h2 id="leaderboard-title">Qwen3 results on the aider polyglot benchmark</h2>
|
||||
|
||||
<div id="controls-container" style="display: flex; align-items: center; width: 100%; max-width: 800px; margin: 10px auto; gap: 10px; box-sizing: border-box; padding: 0 5px; position: relative;">
|
||||
<input type="text" id="editSearchInput" placeholder="Search..." style="flex-grow: 1; padding: 8px; border: 1px solid #ddd; border-radius: 4px;">
|
||||
|
@ -252,6 +268,69 @@ You can add some introductory text for your blog post here.
|
|||
</style>
|
||||
|
||||
<script>
|
||||
const LEADERBOARD_CUSTOM_TITLE = "Aider polyglot coding benchmark results (selected)";
|
||||
const LEADERBOARD_CUSTOM_TITLE = "Qwen3 results on the aider polyglot benchmark";
|
||||
{% include leaderboard_table.js %}
|
||||
</script>
|
||||
|
||||
|
||||
## OpenRouter only TogetherAI, recommended /no_think settings
|
||||
|
||||
These results were obtained with the
|
||||
[recommended](https://huggingface.co/Qwen/Qwen3-235B-A22B#best-practices)
|
||||
non-thinking model settings in `.aider.model.settings.yml`:
|
||||
|
||||
```yaml
|
||||
- name: openrouter/qwen/qwen3-235b-a22b
|
||||
system_prompt_prefix: "/no_think"
|
||||
use_temperature: 0.7
|
||||
extra_params:
|
||||
max_tokens: 24000
|
||||
top_p: 0.8
|
||||
top_k: 20
|
||||
min_p: 0.0
|
||||
temperature: 0.7
|
||||
extra_body:
|
||||
provider:
|
||||
order: ["Together"]
|
||||
```
|
||||
|
||||
And then running aider:
|
||||
|
||||
```bash
|
||||
aider --model openrouter/qwen/qwen3-235b-a22b
|
||||
```
|
||||
|
||||
|
||||
## OpenRouter, all providers, default settings (thinking)
|
||||
|
||||
These results were obtained by simply running aider as shown below, without any model specific settings.
|
||||
This should have enabled thinking, assuming upstream API providers honor that convention for Qwen3.
|
||||
|
||||
```bash
|
||||
aider --model openrouter/qwen/qwen3-xxx
|
||||
```
|
||||
|
||||
## VLLM, bfloat16, recommended /no_think
|
||||
|
||||
These [benchmarks results were obtained by GitHub user AlongWY](https://github.com/Aider-AI/aider/pull/3908)
|
||||
with the
|
||||
[recommended](https://huggingface.co/Qwen/Qwen3-235B-A22B#best-practices)
|
||||
non-thinking model settings in `.aider.model.settings.yml`:
|
||||
|
||||
```yaml
|
||||
- name: openai/<model-name>
|
||||
system_prompt_prefix: "/no_think"
|
||||
use_temperature: 0.7
|
||||
extra_params:
|
||||
max_tokens: 24000
|
||||
top_p: 0.8
|
||||
top_k: 20
|
||||
min_p: 0.0
|
||||
temperature: 0.7
|
||||
```
|
||||
|
||||
And then running aider:
|
||||
|
||||
```bash
|
||||
aider --model openai/<model-name> --openai-api-base <url>
|
||||
```
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue