From 9c9eedd9c53a85daa7ca2228ee384c5066fd6244 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Mon, 26 May 2025 12:18:22 -0700 Subject: [PATCH] chore: Update polyglot leaderboard data for gemini-2.5-flash --- aider/website/_data/polyglot_leaderboard.yml | 37 ++++++++++---------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/aider/website/_data/polyglot_leaderboard.yml b/aider/website/_data/polyglot_leaderboard.yml index 6bd5896af..51676fc72 100644 --- a/aider/website/_data/polyglot_leaderboard.yml +++ b/aider/website/_data/polyglot_leaderboard.yml @@ -1421,34 +1421,35 @@ seconds_per_case: 44.1 total_cost: 65.7484 -- dirname: 2025-05-25-22-42-53--flash25-05-20 +- dirname: 2025-05-26-15-56-31--flash25-05-20-24k-think # dirname is misleading test_cases: 225 model: gemini-2.5-flash-preview-05-20 (no think) edit_format: diff - commit_hash: a8568c3 - pass_rate_1: 23.6 - pass_rate_2: 55.1 - pass_num_1: 53 - pass_num_2: 124 - percent_cases_well_formed: 96.4 - error_outputs: 8 - num_malformed_responses: 8 - num_with_malformed_responses: 8 - user_asks: 96 + commit_hash: 214b811-dirty + thinking_tokens: 0 # <-- no thinking + pass_rate_1: 20.9 + pass_rate_2: 44.0 + pass_num_1: 47 + pass_num_2: 99 + percent_cases_well_formed: 93.8 + error_outputs: 16 + num_malformed_responses: 16 + num_with_malformed_responses: 14 + user_asks: 79 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 - prompt_tokens: 4512205 - completion_tokens: 2991967 - test_timeouts: 2 + prompt_tokens: 5512458 + completion_tokens: 514145 + test_timeouts: 4 total_tests: 225 command: aider --model gemini/gemini-2.5-flash-preview-05-20 - date: 2025-05-25 + date: 2025-05-26 versions: 0.83.3.dev - seconds_per_case: 50.4 - total_cost: 7.6091 - + seconds_per_case: 12.2 + total_cost: 1.1354 + - dirname: 2025-05-25-22-58-44--flash25-05-20-24k-think test_cases: 225 model: gemini-2.5-flash-preview-05-20 (24k think)