mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-16 01:25:00 +00:00
copy
This commit is contained in:
parent
2ea5a98ee0
commit
ec44850646
4 changed files with 177 additions and 37 deletions
|
@ -2184,7 +2184,7 @@
|
|||
|
||||
- dirname: 2024-12-18-01-50-08--o1
|
||||
test_cases: 133
|
||||
model: openrouter/openai/o1
|
||||
model: o1
|
||||
edit_format: diff
|
||||
commit_hash: 074c636-dirty
|
||||
pass_rate_1: 65.4
|
||||
|
|
155
aider/website/_data/polyglot_leaderboard.yml
Normal file
155
aider/website/_data/polyglot_leaderboard.yml
Normal file
|
@ -0,0 +1,155 @@
|
|||
- dirname: 2024-12-21-18-41-18--polyglot-gpt-4o-mini
|
||||
test_cases: 225
|
||||
model: gpt-4o-mini-2024-07-18
|
||||
edit_format: whole
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 0.9
|
||||
pass_rate_2: 3.6
|
||||
pass_num_1: 2
|
||||
pass_num_2: 8
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 0
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 36
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 3
|
||||
total_tests: 225
|
||||
command: aider --model gpt-4o-mini-2024-07-18
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 17.3
|
||||
total_cost: 0.3236
|
||||
|
||||
- dirname: 2024-12-21-18-44-28--polyglot-sonnet
|
||||
test_cases: 225
|
||||
model: claude-3-5-sonnet-20241022
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 18.7
|
||||
pass_rate_2: 45.3
|
||||
pass_num_1: 42
|
||||
pass_num_2: 102
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 1
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 14
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 1
|
||||
test_timeouts: 12
|
||||
total_tests: 225
|
||||
command: aider --model claude-3-5-sonnet-20241022
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 30.8
|
||||
total_cost: 13.4847
|
||||
|
||||
- dirname: 2024-12-21-18-52-34--polyglot-gpt-4o-diff
|
||||
test_cases: 225
|
||||
model: gpt-4o-2024-11-20
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 4.9
|
||||
pass_rate_2: 15.1
|
||||
pass_num_1: 11
|
||||
pass_num_2: 34
|
||||
percent_cases_well_formed: 96.0
|
||||
error_outputs: 12
|
||||
num_malformed_responses: 11
|
||||
num_with_malformed_responses: 9
|
||||
user_asks: 34
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 1
|
||||
test_timeouts: 19
|
||||
total_tests: 225
|
||||
command: aider --model gpt-4o-2024-11-20
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 22.2
|
||||
total_cost: 7.1835
|
||||
|
||||
- dirname: 2024-12-21-19-23-03--polyglot-o1-hard-diff
|
||||
test_cases: 224
|
||||
model: o1-2024-12-17
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 23.7
|
||||
pass_rate_2: 61.7
|
||||
pass_num_1: 53
|
||||
pass_num_2: 139
|
||||
percent_cases_well_formed: 91.5
|
||||
error_outputs: 25
|
||||
num_malformed_responses: 24
|
||||
num_with_malformed_responses: 19
|
||||
user_asks: 16
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 2
|
||||
total_tests: 225
|
||||
command: aider --model openrouter/openai/o1
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 133.2
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-12-21-20-56-21--polyglot-deepseek-diff
|
||||
test_cases: 225
|
||||
model: deepseek-chat
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 5.3
|
||||
pass_rate_2: 17.8
|
||||
pass_num_1: 12
|
||||
pass_num_2: 40
|
||||
percent_cases_well_formed: 92.9
|
||||
error_outputs: 42
|
||||
num_malformed_responses: 37
|
||||
num_with_malformed_responses: 16
|
||||
user_asks: 23
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 5
|
||||
test_timeouts: 5
|
||||
total_tests: 225
|
||||
command: aider --model deepseek/deepseek-chat
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 184.0
|
||||
total_cost: 0.5101
|
||||
|
||||
- dirname: 2024-12-21-21-46-27--polyglot-haiku-diff
|
||||
test_cases: 225
|
||||
model: claude-3-5-haiku-20241022
|
||||
edit_format: diff
|
||||
commit_hash: a755079-dirty
|
||||
pass_rate_1: 7.1
|
||||
pass_rate_2: 28.0
|
||||
pass_num_1: 16
|
||||
pass_num_2: 63
|
||||
percent_cases_well_formed: 91.1
|
||||
error_outputs: 31
|
||||
num_malformed_responses: 30
|
||||
num_with_malformed_responses: 20
|
||||
user_asks: 13
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 1
|
||||
test_timeouts: 9
|
||||
total_tests: 225
|
||||
command: aider --model claude-3-5-haiku-20241022
|
||||
date: 2024-12-21
|
||||
versions: 0.69.2.dev
|
||||
seconds_per_case: 31.8
|
||||
total_cost: 6.0583
|
Loading…
Add table
Add a link
Reference in a new issue