chore: Update polyglot leaderboard data for gemini-2.5-flash

This commit is contained in:
Paul Gauthier 2025-05-26 12:18:22 -07:00 committed by Paul Gauthier (aider)
parent acebc11237
commit 9c9eedd9c5

View file

@ -1421,33 +1421,34 @@
seconds_per_case: 44.1 seconds_per_case: 44.1
total_cost: 65.7484 total_cost: 65.7484
- dirname: 2025-05-25-22-42-53--flash25-05-20 - dirname: 2025-05-26-15-56-31--flash25-05-20-24k-think # dirname is misleading
test_cases: 225 test_cases: 225
model: gemini-2.5-flash-preview-05-20 (no think) model: gemini-2.5-flash-preview-05-20 (no think)
edit_format: diff edit_format: diff
commit_hash: a8568c3 commit_hash: 214b811-dirty
pass_rate_1: 23.6 thinking_tokens: 0 # <-- no thinking
pass_rate_2: 55.1 pass_rate_1: 20.9
pass_num_1: 53 pass_rate_2: 44.0
pass_num_2: 124 pass_num_1: 47
percent_cases_well_formed: 96.4 pass_num_2: 99
error_outputs: 8 percent_cases_well_formed: 93.8
num_malformed_responses: 8 error_outputs: 16
num_with_malformed_responses: 8 num_malformed_responses: 16
user_asks: 96 num_with_malformed_responses: 14
user_asks: 79
lazy_comments: 0 lazy_comments: 0
syntax_errors: 0 syntax_errors: 0
indentation_errors: 0 indentation_errors: 0
exhausted_context_windows: 0 exhausted_context_windows: 0
prompt_tokens: 4512205 prompt_tokens: 5512458
completion_tokens: 2991967 completion_tokens: 514145
test_timeouts: 2 test_timeouts: 4
total_tests: 225 total_tests: 225
command: aider --model gemini/gemini-2.5-flash-preview-05-20 command: aider --model gemini/gemini-2.5-flash-preview-05-20
date: 2025-05-25 date: 2025-05-26
versions: 0.83.3.dev versions: 0.83.3.dev
seconds_per_case: 50.4 seconds_per_case: 12.2
total_cost: 7.6091 total_cost: 1.1354
- dirname: 2025-05-25-22-58-44--flash25-05-20-24k-think - dirname: 2025-05-25-22-58-44--flash25-05-20-24k-think
test_cases: 225 test_cases: 225