diff --git a/_data/edit_leaderboard.yml b/_data/edit_leaderboard.yml index 64722b334..a08f596f7 100644 --- a/_data/edit_leaderboard.yml +++ b/_data/edit_leaderboard.yml @@ -273,7 +273,7 @@ - dirname: 2024-05-01-02-09-20--gpt-4-turbo-examples test_cases: 133 - model: gpt-4-turbo-2024-04-09 + model: gpt-4-turbo-2024-04-09 (udiff) released: 2024-04-09 edit_format: udiff commit_hash: e610e5b-dirty @@ -454,3 +454,25 @@ seconds_per_case: 6.0 total_cost: 0.0000 +- dirname: 2024-04-12-22-18-20--gpt-4-turbo-2024-04-09-plain-diff + test_cases: 33 + model: gpt-4-turbo-2024-04-09 (diff) + edit_format: diff + commit_hash: 9b2e697-dirty + pass_rate_1: 48.5 + pass_rate_2: 57.6 + percent_cases_well_formed: 100.0 + error_outputs: 15 + num_malformed_responses: 0 + user_asks: 15 + lazy_comments: 0 + syntax_errors: 0 + indentation_errors: 0 + exhausted_context_windows: 0 + test_timeouts: 0 + command: aider --model gpt-4-turbo-2024-04-09 + date: 2024-04-12 + versions: 0.28.1-dev + seconds_per_case: 17.6 + total_cost: 1.6205 + \ No newline at end of file diff --git a/_data/refactor_leaderboard.yml b/_data/refactor_leaderboard.yml index 8b3816a1d..db4d3483f 100644 --- a/_data/refactor_leaderboard.yml +++ b/_data/refactor_leaderboard.yml @@ -40,7 +40,7 @@ total_cost: 27.9176 - dirname: 2024-04-09-21-49-54--refac-gpt-4-turbo-2024-04-09 test_cases: 88 - model: gpt-4-turbo-2024-04-09 + model: gpt-4-turbo-2024-04-09 (udiff) edit_format: udiff commit_hash: b75fdb9 pass_rate_1: 34.1 @@ -120,4 +120,27 @@ date: 2024-05-13 versions: 0.34.1-dev seconds_per_case: 27.8 - total_cost: 0.0000 \ No newline at end of file + total_cost: 0.0000 + +- dirname: 2024-04-10-13-26-18--refac-gpt-4-turbo-2024-04-09-diff + test_cases: 88 + model: gpt-4-turbo-2024-04-09 (diff) + edit_format: diff + commit_hash: 7875418 + pass_rate_1: 21.4 + percent_cases_well_formed: 6.8 + error_outputs: 247 + num_malformed_responses: 82 + user_asks: 1 + lazy_comments: 2 + syntax_errors: 3 + indentation_errors: 8 + exhausted_context_windows: 0 + test_timeouts: 0 + command: aider --model gpt-4-turbo-2024-04-09 + date: 2024-04-10 + versions: 0.28.1-dev + seconds_per_case: 67.8 + total_cost: 20.4889 + + \ No newline at end of file