From 6d2b9d6699ebec73ec2a664bba64567026eb5eec Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Wed, 15 May 2024 09:49:38 -0700 Subject: [PATCH] Added gpt-4-turbo-2024-04-09 (diff) to the leaderboard --- _data/edit_leaderboard.yml | 22 ++++++++++++++++++++++ _data/refactor_leaderboard.yml | 25 ++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/_data/edit_leaderboard.yml b/_data/edit_leaderboard.yml index dd0d5695e..691214ee8 100644 --- a/_data/edit_leaderboard.yml +++ b/_data/edit_leaderboard.yml @@ -426,3 +426,25 @@ seconds_per_case: 6.0 total_cost: 0.0000 +- dirname: 2024-04-12-22-18-20--gpt-4-turbo-2024-04-09-plain-diff + test_cases: 33 + model: gpt-4-turbo-2024-04-09 (diff) + edit_format: diff + commit_hash: 9b2e697-dirty + pass_rate_1: 48.5 + pass_rate_2: 57.6 + percent_cases_well_formed: 100.0 + error_outputs: 15 + num_malformed_responses: 0 + user_asks: 15 + lazy_comments: 0 + syntax_errors: 0 + indentation_errors: 0 + exhausted_context_windows: 0 + test_timeouts: 0 + command: aider --model gpt-4-turbo-2024-04-09 + date: 2024-04-12 + versions: 0.28.1-dev + seconds_per_case: 17.6 + total_cost: 1.6205 + \ No newline at end of file diff --git a/_data/refactor_leaderboard.yml b/_data/refactor_leaderboard.yml index 8b3816a1d..22d1fb666 100644 --- a/_data/refactor_leaderboard.yml +++ b/_data/refactor_leaderboard.yml @@ -120,4 +120,27 @@ date: 2024-05-13 versions: 0.34.1-dev seconds_per_case: 27.8 - total_cost: 0.0000 \ No newline at end of file + total_cost: 0.0000 + +- dirname: 2024-04-10-13-26-18--refac-gpt-4-turbo-2024-04-09-diff + test_cases: 88 + model: gpt-4-turbo-2024-04-09 (diff) + edit_format: diff + commit_hash: 7875418 + pass_rate_1: 21.4 + percent_cases_well_formed: 6.8 + error_outputs: 247 + num_malformed_responses: 82 + user_asks: 1 + lazy_comments: 2 + syntax_errors: 3 + indentation_errors: 8 + exhausted_context_windows: 0 + test_timeouts: 0 + command: aider --model gpt-4-turbo-2024-04-09 + date: 2024-04-10 + versions: 0.28.1-dev + seconds_per_case: 67.8 + total_cost: 20.4889 + + \ No newline at end of file