mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-06 04:35:00 +00:00
chore: Update polyglot leaderboard test results and metadata
This commit is contained in:
parent
5b6c186125
commit
a777f336e1
1 changed files with 17 additions and 17 deletions
|
@ -24,31 +24,31 @@
|
||||||
seconds_per_case: 17.3
|
seconds_per_case: 17.3
|
||||||
total_cost: 0.3236
|
total_cost: 0.3236
|
||||||
|
|
||||||
- dirname: 2024-12-21-18-44-28--polyglot-sonnet
|
- dirname: 2025-01-17-19-44-33--sonnet-baseline-jan-17
|
||||||
test_cases: 225
|
test_cases: 225
|
||||||
model: claude-3-5-sonnet-20241022
|
model: claude-3-5-sonnet-20241022
|
||||||
edit_format: diff
|
edit_format: diff
|
||||||
commit_hash: a755079-dirty
|
commit_hash: 6451d59
|
||||||
pass_rate_1: 18.7
|
pass_rate_1: 22.2
|
||||||
pass_rate_2: 45.3
|
pass_rate_2: 51.6
|
||||||
pass_num_1: 42
|
pass_num_1: 50
|
||||||
pass_num_2: 102
|
pass_num_2: 116
|
||||||
percent_cases_well_formed: 100.0
|
percent_cases_well_formed: 99.6
|
||||||
error_outputs: 1
|
error_outputs: 2
|
||||||
num_malformed_responses: 0
|
num_malformed_responses: 1
|
||||||
num_with_malformed_responses: 0
|
num_with_malformed_responses: 1
|
||||||
user_asks: 14
|
user_asks: 11
|
||||||
lazy_comments: 0
|
lazy_comments: 0
|
||||||
syntax_errors: 0
|
syntax_errors: 0
|
||||||
indentation_errors: 0
|
indentation_errors: 0
|
||||||
exhausted_context_windows: 1
|
exhausted_context_windows: 1
|
||||||
test_timeouts: 12
|
test_timeouts: 8
|
||||||
total_tests: 225
|
total_tests: 225
|
||||||
command: aider --model claude-3-5-sonnet-20241022
|
command: aider --model claude-3-5-sonnet-20241022
|
||||||
date: 2024-12-21
|
date: 2025-01-17
|
||||||
versions: 0.69.2.dev
|
versions: 0.71.2.dev
|
||||||
seconds_per_case: 30.8
|
seconds_per_case: 21.4
|
||||||
total_cost: 13.4847
|
total_cost: 14.4063
|
||||||
|
|
||||||
- dirname: 2024-12-30-20-57-12--gpt-4o-2024-11-20-ex-as-sys
|
- dirname: 2024-12-30-20-57-12--gpt-4o-2024-11-20-ex-as-sys
|
||||||
test_cases: 225
|
test_cases: 225
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue