diff --git a/aider/website/_data/polyglot_leaderboard.yml b/aider/website/_data/polyglot_leaderboard.yml index 92cac000a..561a25517 100644 --- a/aider/website/_data/polyglot_leaderboard.yml +++ b/aider/website/_data/polyglot_leaderboard.yml @@ -1279,30 +1279,31 @@ seconds_per_case: 372.2 total_cost: 0.7603 -- dirname: 2025-05-08-03-22-37--qwen3-235b-defaults +- dirname: 2025-05-09-17-02-02--qwen3-235b-a22b.unthink_16k_diff test_cases: 225 - model: Qwen3 235B A22B + model: Qwen3 235B A22B diff, no think, Alibaba API edit_format: diff - commit_hash: aaacee5-dirty - pass_rate_1: 17.3 - pass_rate_2: 49.8 - pass_num_1: 39 - pass_num_2: 112 - percent_cases_well_formed: 91.6 - error_outputs: 58 - num_malformed_responses: 29 - num_with_malformed_responses: 19 - user_asks: 102 + commit_hash: 91d7fbd-dirty + pass_rate_1: 28.9 + pass_rate_2: 59.6 + pass_num_1: 65 + pass_num_2: 134 + percent_cases_well_formed: 92.9 + error_outputs: 22 + num_malformed_responses: 22 + num_with_malformed_responses: 16 + user_asks: 111 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 - prompt_tokens: 0 - completion_tokens: 0 + prompt_tokens: 2816192 + completion_tokens: 342062 test_timeouts: 1 total_tests: 225 - command: aider --model openrouter/qwen/qwen3-235b-a22b - date: 2025-05-08 + command: aider --model openai/qwen3-235b-a22b + date: 2025-05-09 versions: 0.82.4.dev - seconds_per_case: 428.1 - total_cost: 1.8037 \ No newline at end of file + seconds_per_case: 45.4 + total_cost: 0.0000 + \ No newline at end of file diff --git a/aider/website/_data/qwen3_leaderboard.yml b/aider/website/_data/qwen3_leaderboard.yml index 5b8e3233d..68233c26f 100644 --- a/aider/website/_data/qwen3_leaderboard.yml +++ b/aider/website/_data/qwen3_leaderboard.yml @@ -217,7 +217,7 @@ - dirname: 2025-05-09-17-02-02--qwen3-235b-a22b.unthink_16k_diff test_cases: 225 - model: Qwen3 235B A22B no think, via official Alibaba API + model: Qwen3 235B A22B diff, no think, via official Alibaba API edit_format: diff commit_hash: 91d7fbd-dirty pass_rate_1: 28.9 @@ -241,4 +241,32 @@ date: 2025-05-09 versions: 0.82.4.dev seconds_per_case: 45.4 + total_cost: 0.0000 + +- dirname: 2025-05-09-23-01-22--qwen3-235b-a22b.unthink_16k_whole + test_cases: 225 + model: Qwen3 235B A22B whole, no think, via official Alibaba API + edit_format: whole + commit_hash: 425fb6d + pass_rate_1: 26.7 + pass_rate_2: 61.8 + pass_num_1: 60 + pass_num_2: 139 + percent_cases_well_formed: 100.0 + error_outputs: 0 + num_malformed_responses: 0 + num_with_malformed_responses: 0 + user_asks: 175 + lazy_comments: 0 + syntax_errors: 0 + indentation_errors: 0 + exhausted_context_windows: 0 + prompt_tokens: 2768173 + completion_tokens: 384000 + test_timeouts: 1 + total_tests: 225 + command: aider --model openai/qwen3-235b-a22b + date: 2025-05-09 + versions: 0.82.4.dev + seconds_per_case: 50.8 total_cost: 0.0000 \ No newline at end of file