From 9b6dda88133c1a42fd3fc6ab68066e22668e35c1 Mon Sep 17 00:00:00 2001 From: Jun Siang Cheah Date: Wed, 28 Aug 2024 08:51:55 +0100 Subject: [PATCH 1/2] docs: add benchmark results for new gemini experimental models --- aider/website/_data/edit_leaderboard.yml | 72 +++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/aider/website/_data/edit_leaderboard.yml b/aider/website/_data/edit_leaderboard.yml index 7aa2e1343..e0f5ad6fb 100644 --- a/aider/website/_data/edit_leaderboard.yml +++ b/aider/website/_data/edit_leaderboard.yml @@ -878,4 +878,74 @@ versions: 0.50.2-dev seconds_per_case: 26.3 total_cost: 3.6113 - \ No newline at end of file + +- dirname: 2024-08-28-07-10-50--gemini-1.5-pro-exp-0827-diff-fenced + test_cases: 133 + model: gemini/gemini-1.5-pro-exp-0827 + edit_format: diff-fenced + commit_hash: d8adc75 + pass_rate_1: 54.9 + pass_rate_2: 66.9 + percent_cases_well_formed: 94.7 + error_outputs: 112 + num_malformed_responses: 26 + num_with_malformed_responses: 7 + user_asks: 38 + lazy_comments: 0 + syntax_errors: 1 + indentation_errors: 0 + exhausted_context_windows: 0 + test_timeouts: 1 + command: aider --model gemini/gemini-1.5-pro-exp-0827 + date: 2024-08-28 + versions: 0.53.1-dev + seconds_per_case: 14.5 + total_cost: 0.0000 + +- dirname: 2024-08-27-19-20-19--gemini-1.5-flash-exp-0827 + test_cases: 133 + model: gemini/gemini-1.5-flash-exp-0827 + edit_format: whole + commit_hash: d8adc75 + pass_rate_1: 40.6 + pass_rate_2: 52.6 + percent_cases_well_formed: 100.0 + error_outputs: 1 + num_malformed_responses: 0 + num_with_malformed_responses: 0 + user_asks: 1 + lazy_comments: 3 + syntax_errors: 1 + indentation_errors: 0 + exhausted_context_windows: 0 + test_timeouts: 4 + command: aider --model gemini/gemini-1.5-flash-exp-0827 + date: 2024-08-27 + versions: 0.53.1-dev + seconds_per_case: 6.3 + total_cost: 0.0000 + +- dirname: 2024-08-27-19-42-05--gemini-1.5-flash-8b-exp-0827 + test_cases: 133 + model: gemini/gemini-1.5-flash-8b-exp-0827 + edit_format: whole + commit_hash: d8adc75 + pass_rate_1: 31.6 + pass_rate_2: 38.3 + percent_cases_well_formed: 100.0 + error_outputs: 12 + num_malformed_responses: 0 + num_with_malformed_responses: 0 + user_asks: 10 + lazy_comments: 250 + syntax_errors: 6 + indentation_errors: 1 + exhausted_context_windows: 0 + test_timeouts: 0 + command: aider --model gemini/gemini-1.5-flash-8b-exp-0827 + date: 2024-08-27 + versions: 0.53.1-dev + seconds_per_case: 7.2 + total_cost: 0.0000 + + From 7f8203f89cc6fb132f18a71a13722efd482d6dfe Mon Sep 17 00:00:00 2001 From: Jun Siang Cheah Date: Wed, 28 Aug 2024 09:21:04 +0100 Subject: [PATCH 2/2] docs: match benchmark formatting --- aider/website/_data/edit_leaderboard.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/aider/website/_data/edit_leaderboard.yml b/aider/website/_data/edit_leaderboard.yml index e0f5ad6fb..59c105c88 100644 --- a/aider/website/_data/edit_leaderboard.yml +++ b/aider/website/_data/edit_leaderboard.yml @@ -881,7 +881,7 @@ - dirname: 2024-08-28-07-10-50--gemini-1.5-pro-exp-0827-diff-fenced test_cases: 133 - model: gemini/gemini-1.5-pro-exp-0827 + model: gemini-1.5-pro-exp-0827 edit_format: diff-fenced commit_hash: d8adc75 pass_rate_1: 54.9 @@ -904,7 +904,7 @@ - dirname: 2024-08-27-19-20-19--gemini-1.5-flash-exp-0827 test_cases: 133 - model: gemini/gemini-1.5-flash-exp-0827 + model: gemini-1.5-flash-exp-0827 edit_format: whole commit_hash: d8adc75 pass_rate_1: 40.6 @@ -927,7 +927,7 @@ - dirname: 2024-08-27-19-42-05--gemini-1.5-flash-8b-exp-0827 test_cases: 133 - model: gemini/gemini-1.5-flash-8b-exp-0827 + model: gemini-1.5-flash-8b-exp-0827 edit_format: whole commit_hash: d8adc75 pass_rate_1: 31.6 @@ -947,5 +947,3 @@ versions: 0.53.1-dev seconds_per_case: 7.2 total_cost: 0.0000 - -