chore: Update polyglot leaderboard data for gemini-2.5-flash

This commit is contained in:
Paul Gauthier 2025-05-26 12:18:22 -07:00 committed by Paul Gauthier (aider)
parent acebc11237
commit 9c9eedd9c5

View file

@ -1421,34 +1421,35 @@
seconds_per_case: 44.1
total_cost: 65.7484
- dirname: 2025-05-25-22-42-53--flash25-05-20
- dirname: 2025-05-26-15-56-31--flash25-05-20-24k-think # dirname is misleading
test_cases: 225
model: gemini-2.5-flash-preview-05-20 (no think)
edit_format: diff
commit_hash: a8568c3
pass_rate_1: 23.6
pass_rate_2: 55.1
pass_num_1: 53
pass_num_2: 124
percent_cases_well_formed: 96.4
error_outputs: 8
num_malformed_responses: 8
num_with_malformed_responses: 8
user_asks: 96
commit_hash: 214b811-dirty
thinking_tokens: 0 # <-- no thinking
pass_rate_1: 20.9
pass_rate_2: 44.0
pass_num_1: 47
pass_num_2: 99
percent_cases_well_formed: 93.8
error_outputs: 16
num_malformed_responses: 16
num_with_malformed_responses: 14
user_asks: 79
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
prompt_tokens: 4512205
completion_tokens: 2991967
test_timeouts: 2
prompt_tokens: 5512458
completion_tokens: 514145
test_timeouts: 4
total_tests: 225
command: aider --model gemini/gemini-2.5-flash-preview-05-20
date: 2025-05-25
date: 2025-05-26
versions: 0.83.3.dev
seconds_per_case: 50.4
total_cost: 7.6091
seconds_per_case: 12.2
total_cost: 1.1354
- dirname: 2025-05-25-22-58-44--flash25-05-20-24k-think
test_cases: 225
model: gemini-2.5-flash-preview-05-20 (24k think)