diff --git a/aider/website/_data/polyglot_leaderboard.yml b/aider/website/_data/polyglot_leaderboard.yml index 2572c3dc7..657e5ea04 100644 --- a/aider/website/_data/polyglot_leaderboard.yml +++ b/aider/website/_data/polyglot_leaderboard.yml @@ -909,4 +909,82 @@ date: 2025-04-06 versions: 0.81.2.dev seconds_per_case: 20.5 + total_cost: 0.0000 + +- dirname: 2025-04-10-04-21-31--grok3-diff-exuser + test_cases: 225 + model: Grok 3 Beta + edit_format: diff + commit_hash: 2dd40fc-dirty + pass_rate_1: 22.2 + pass_rate_2: 53.3 + pass_num_1: 50 + pass_num_2: 120 + percent_cases_well_formed: 99.6 + error_outputs: 1 + num_malformed_responses: 1 + num_with_malformed_responses: 1 + user_asks: 68 + lazy_comments: 0 + syntax_errors: 0 + indentation_errors: 0 + exhausted_context_windows: 0 + test_timeouts: 2 + total_tests: 225 + command: aider --model openrouter/x-ai/grok-3-beta + date: 2025-04-10 + versions: 0.81.2.dev + seconds_per_case: 15.3 + total_cost: 11.0338 + +- dirname: 2025-04-10-18-47-24--grok3-mini-whole-exuser + test_cases: 225 + model: Grok 3 Mini Beta + edit_format: whole + commit_hash: 14ffe77-dirty + pass_rate_1: 11.1 + pass_rate_2: 34.7 + pass_num_1: 25 + pass_num_2: 78 + percent_cases_well_formed: 100.0 + error_outputs: 3 + num_malformed_responses: 0 + num_with_malformed_responses: 0 + user_asks: 73 + lazy_comments: 0 + syntax_errors: 0 + indentation_errors: 0 + exhausted_context_windows: 0 + test_timeouts: 5 + total_tests: 225 + command: aider --model openrouter/x-ai/grok-3-mini-beta + date: 2025-04-10 + versions: 0.81.2.dev + seconds_per_case: 35.1 + total_cost: 0.7856 + +- dirname: 2025-04-10-19-02-44--oalpha-diff-exsys + test_cases: 225 + model: Optimus Alpha + edit_format: diff + commit_hash: 532bc45-dirty + pass_rate_1: 21.3 + pass_rate_2: 52.9 + pass_num_1: 48 + pass_num_2: 119 + percent_cases_well_formed: 97.3 + error_outputs: 7 + num_malformed_responses: 6 + num_with_malformed_responses: 6 + user_asks: 182 + lazy_comments: 0 + syntax_errors: 0 + indentation_errors: 0 + exhausted_context_windows: 0 + test_timeouts: 3 + total_tests: 225 + command: aider --model openrouter/openrouter/optimus-alpha + date: 2025-04-10 + versions: 0.81.2.dev + seconds_per_case: 18.4 total_cost: 0.0000 \ No newline at end of file