updated gpt-4-1106-preview leaderboards

This commit is contained in:
Paul Gauthier 2024-05-08 15:02:16 -07:00
parent eaa2514981
commit 4c6fd48b27
2 changed files with 33 additions and 30 deletions

View file

@ -231,27 +231,29 @@
versions: 0.18.2-dev versions: 0.18.2-dev
seconds_per_case: 33.6 seconds_per_case: 33.6
total_cost: 17.4657 total_cost: 17.4657
- dirname: 2024-05-04-14-33-15--redo-gpt-4-1106-preview-udiff5
- dirname: 2024-05-08-21-16-03--may-gpt-4-1106-preview-udiff
test_cases: 133 test_cases: 133
model: gpt-4-1106-preview model: gpt-4-1106-preview
edit_format: udiff edit_format: udiff
commit_hash: 1981105-dirty commit_hash: 87664dc
pass_rate_1: 57.1 pass_rate_1: 51.9
pass_rate_2: 63.2 pass_rate_2: 65.4
percent_cases_well_formed: 94.0 percent_cases_well_formed: 92.5
error_outputs: 24 error_outputs: 30
num_malformed_responses: 8 num_malformed_responses: 10
user_asks: 0 user_asks: 0
lazy_comments: 7 lazy_comments: 3
syntax_errors: 3 syntax_errors: 11
indentation_errors: 5 indentation_errors: 2
exhausted_context_windows: 0 exhausted_context_windows: 0
test_timeouts: 2 test_timeouts: 1
command: aider command: aider
date: 2024-05-04 date: 2024-05-08
versions: 0.31.2-dev versions: 0.33.1-dev
seconds_per_case: 15.6 seconds_per_case: 20.4
total_cost: 5.9468 total_cost: 6.6061
- dirname: 2024-05-01-02-09-20--gpt-4-turbo-examples - dirname: 2024-05-01-02-09-20--gpt-4-turbo-examples
test_cases: 133 test_cases: 133
model: gpt-4-turbo-2024-04-09 model: gpt-4-turbo-2024-04-09

View file

@ -78,24 +78,25 @@
versions: 0.22.1-dev versions: 0.22.1-dev
seconds_per_case: 70.4 seconds_per_case: 70.4
total_cost: 43.3437 total_cost: 43.3437
- dirname: 2024-01-25-21-27-47--jan-gpt-4-1106-preview-udiff
- dirname: 2024-05-08-21-24-16--may-refac-gpt-4-1106-preview
test_cases: 89 test_cases: 89
model: gpt-4-1106-preview model: gpt-4-1106-preview
edit_format: udiff edit_format: udiff
commit_hash: a75e7c8 commit_hash: eaa2514-dirty
pass_rate_1: 57.3 pass_rate_1: 50.6
percent_cases_well_formed: 31.5 percent_cases_well_formed: 39.3
error_outputs: 127 error_outputs: 164
num_malformed_responses: 61 num_malformed_responses: 54
user_asks: 0 user_asks: 1
lazy_comments: 4 lazy_comments: 17
syntax_errors: 1 syntax_errors: 0
indentation_errors: 15 indentation_errors: 8
exhausted_context_windows: 1 exhausted_context_windows: 0
test_timeouts: 0 test_timeouts: 0
command: aider command: aider
date: 2024-01-25 date: 2024-05-08
versions: 0.22.1-dev versions: 0.33.1-dev
seconds_per_case: 181.9 seconds_per_case: 61.8
total_cost: 18.6347 total_cost: 18.3844