mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-01 10:14:59 +00:00
updated gpt-4-1106-preview leaderboards
This commit is contained in:
parent
eaa2514981
commit
4c6fd48b27
2 changed files with 33 additions and 30 deletions
|
@ -231,27 +231,29 @@
|
||||||
versions: 0.18.2-dev
|
versions: 0.18.2-dev
|
||||||
seconds_per_case: 33.6
|
seconds_per_case: 33.6
|
||||||
total_cost: 17.4657
|
total_cost: 17.4657
|
||||||
- dirname: 2024-05-04-14-33-15--redo-gpt-4-1106-preview-udiff5
|
|
||||||
|
- dirname: 2024-05-08-21-16-03--may-gpt-4-1106-preview-udiff
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: gpt-4-1106-preview
|
model: gpt-4-1106-preview
|
||||||
edit_format: udiff
|
edit_format: udiff
|
||||||
commit_hash: 1981105-dirty
|
commit_hash: 87664dc
|
||||||
pass_rate_1: 57.1
|
pass_rate_1: 51.9
|
||||||
pass_rate_2: 63.2
|
pass_rate_2: 65.4
|
||||||
percent_cases_well_formed: 94.0
|
percent_cases_well_formed: 92.5
|
||||||
error_outputs: 24
|
error_outputs: 30
|
||||||
num_malformed_responses: 8
|
num_malformed_responses: 10
|
||||||
user_asks: 0
|
user_asks: 0
|
||||||
lazy_comments: 7
|
lazy_comments: 3
|
||||||
syntax_errors: 3
|
syntax_errors: 11
|
||||||
indentation_errors: 5
|
indentation_errors: 2
|
||||||
exhausted_context_windows: 0
|
exhausted_context_windows: 0
|
||||||
test_timeouts: 2
|
test_timeouts: 1
|
||||||
command: aider
|
command: aider
|
||||||
date: 2024-05-04
|
date: 2024-05-08
|
||||||
versions: 0.31.2-dev
|
versions: 0.33.1-dev
|
||||||
seconds_per_case: 15.6
|
seconds_per_case: 20.4
|
||||||
total_cost: 5.9468
|
total_cost: 6.6061
|
||||||
|
|
||||||
- dirname: 2024-05-01-02-09-20--gpt-4-turbo-examples
|
- dirname: 2024-05-01-02-09-20--gpt-4-turbo-examples
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: gpt-4-turbo-2024-04-09
|
model: gpt-4-turbo-2024-04-09
|
||||||
|
|
|
@ -78,24 +78,25 @@
|
||||||
versions: 0.22.1-dev
|
versions: 0.22.1-dev
|
||||||
seconds_per_case: 70.4
|
seconds_per_case: 70.4
|
||||||
total_cost: 43.3437
|
total_cost: 43.3437
|
||||||
- dirname: 2024-01-25-21-27-47--jan-gpt-4-1106-preview-udiff
|
|
||||||
|
- dirname: 2024-05-08-21-24-16--may-refac-gpt-4-1106-preview
|
||||||
test_cases: 89
|
test_cases: 89
|
||||||
model: gpt-4-1106-preview
|
model: gpt-4-1106-preview
|
||||||
edit_format: udiff
|
edit_format: udiff
|
||||||
commit_hash: a75e7c8
|
commit_hash: eaa2514-dirty
|
||||||
pass_rate_1: 57.3
|
pass_rate_1: 50.6
|
||||||
percent_cases_well_formed: 31.5
|
percent_cases_well_formed: 39.3
|
||||||
error_outputs: 127
|
error_outputs: 164
|
||||||
num_malformed_responses: 61
|
num_malformed_responses: 54
|
||||||
user_asks: 0
|
user_asks: 1
|
||||||
lazy_comments: 4
|
lazy_comments: 17
|
||||||
syntax_errors: 1
|
syntax_errors: 0
|
||||||
indentation_errors: 15
|
indentation_errors: 8
|
||||||
exhausted_context_windows: 1
|
exhausted_context_windows: 0
|
||||||
test_timeouts: 0
|
test_timeouts: 0
|
||||||
command: aider
|
command: aider
|
||||||
date: 2024-01-25
|
date: 2024-05-08
|
||||||
versions: 0.22.1-dev
|
versions: 0.33.1-dev
|
||||||
seconds_per_case: 181.9
|
seconds_per_case: 61.8
|
||||||
total_cost: 18.6347
|
total_cost: 18.3844
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue