mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-06 20:54:59 +00:00
chore: Update polyglot leaderboard data for gemini-2.5-flash
This commit is contained in:
parent
acebc11237
commit
9c9eedd9c5
1 changed files with 19 additions and 18 deletions
|
@ -1421,33 +1421,34 @@
|
||||||
seconds_per_case: 44.1
|
seconds_per_case: 44.1
|
||||||
total_cost: 65.7484
|
total_cost: 65.7484
|
||||||
|
|
||||||
- dirname: 2025-05-25-22-42-53--flash25-05-20
|
- dirname: 2025-05-26-15-56-31--flash25-05-20-24k-think # dirname is misleading
|
||||||
test_cases: 225
|
test_cases: 225
|
||||||
model: gemini-2.5-flash-preview-05-20 (no think)
|
model: gemini-2.5-flash-preview-05-20 (no think)
|
||||||
edit_format: diff
|
edit_format: diff
|
||||||
commit_hash: a8568c3
|
commit_hash: 214b811-dirty
|
||||||
pass_rate_1: 23.6
|
thinking_tokens: 0 # <-- no thinking
|
||||||
pass_rate_2: 55.1
|
pass_rate_1: 20.9
|
||||||
pass_num_1: 53
|
pass_rate_2: 44.0
|
||||||
pass_num_2: 124
|
pass_num_1: 47
|
||||||
percent_cases_well_formed: 96.4
|
pass_num_2: 99
|
||||||
error_outputs: 8
|
percent_cases_well_formed: 93.8
|
||||||
num_malformed_responses: 8
|
error_outputs: 16
|
||||||
num_with_malformed_responses: 8
|
num_malformed_responses: 16
|
||||||
user_asks: 96
|
num_with_malformed_responses: 14
|
||||||
|
user_asks: 79
|
||||||
lazy_comments: 0
|
lazy_comments: 0
|
||||||
syntax_errors: 0
|
syntax_errors: 0
|
||||||
indentation_errors: 0
|
indentation_errors: 0
|
||||||
exhausted_context_windows: 0
|
exhausted_context_windows: 0
|
||||||
prompt_tokens: 4512205
|
prompt_tokens: 5512458
|
||||||
completion_tokens: 2991967
|
completion_tokens: 514145
|
||||||
test_timeouts: 2
|
test_timeouts: 4
|
||||||
total_tests: 225
|
total_tests: 225
|
||||||
command: aider --model gemini/gemini-2.5-flash-preview-05-20
|
command: aider --model gemini/gemini-2.5-flash-preview-05-20
|
||||||
date: 2025-05-25
|
date: 2025-05-26
|
||||||
versions: 0.83.3.dev
|
versions: 0.83.3.dev
|
||||||
seconds_per_case: 50.4
|
seconds_per_case: 12.2
|
||||||
total_cost: 7.6091
|
total_cost: 1.1354
|
||||||
|
|
||||||
- dirname: 2025-05-25-22-58-44--flash25-05-20-24k-think
|
- dirname: 2025-05-25-22-58-44--flash25-05-20-24k-think
|
||||||
test_cases: 225
|
test_cases: 225
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue