From a777f336e1d88a91a436a6082565e68085eb4d33 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Fri, 17 Jan 2025 13:37:02 -0800 Subject: [PATCH] chore: Update polyglot leaderboard test results and metadata --- aider/website/_data/polyglot_leaderboard.yml | 34 ++++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/aider/website/_data/polyglot_leaderboard.yml b/aider/website/_data/polyglot_leaderboard.yml index 8b52c1dbd..eb2b97b19 100644 --- a/aider/website/_data/polyglot_leaderboard.yml +++ b/aider/website/_data/polyglot_leaderboard.yml @@ -24,32 +24,32 @@ seconds_per_case: 17.3 total_cost: 0.3236 -- dirname: 2024-12-21-18-44-28--polyglot-sonnet +- dirname: 2025-01-17-19-44-33--sonnet-baseline-jan-17 test_cases: 225 model: claude-3-5-sonnet-20241022 edit_format: diff - commit_hash: a755079-dirty - pass_rate_1: 18.7 - pass_rate_2: 45.3 - pass_num_1: 42 - pass_num_2: 102 - percent_cases_well_formed: 100.0 - error_outputs: 1 - num_malformed_responses: 0 - num_with_malformed_responses: 0 - user_asks: 14 + commit_hash: 6451d59 + pass_rate_1: 22.2 + pass_rate_2: 51.6 + pass_num_1: 50 + pass_num_2: 116 + percent_cases_well_formed: 99.6 + error_outputs: 2 + num_malformed_responses: 1 + num_with_malformed_responses: 1 + user_asks: 11 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 1 - test_timeouts: 12 + test_timeouts: 8 total_tests: 225 command: aider --model claude-3-5-sonnet-20241022 - date: 2024-12-21 - versions: 0.69.2.dev - seconds_per_case: 30.8 - total_cost: 13.4847 - + date: 2025-01-17 + versions: 0.71.2.dev + seconds_per_case: 21.4 + total_cost: 14.4063 + - dirname: 2024-12-30-20-57-12--gpt-4o-2024-11-20-ex-as-sys test_cases: 225 model: gpt-4o-2024-11-20