From 559279c781dc0ac94ed7ea1b4bbd70ecf0304d94 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Thu, 20 Jun 2024 09:56:18 -0700 Subject: [PATCH] copy --- website/_data/refactor_leaderboard.yml | 23 ++++++++++++++++++++++- website/docs/leaderboards/index.md | 4 +++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/website/_data/refactor_leaderboard.yml b/website/_data/refactor_leaderboard.yml index db4d3483f..11773ac39 100644 --- a/website/_data/refactor_leaderboard.yml +++ b/website/_data/refactor_leaderboard.yml @@ -143,4 +143,25 @@ seconds_per_case: 67.8 total_cost: 20.4889 - \ No newline at end of file + +- dirname: 2024-06-20-16-39-18--refac-claude-3.5-sonnet-diff + test_cases: 89 + model: claude-3.5-sonnet (diff) + edit_format: diff + commit_hash: e5e07f9 + pass_rate_1: 55.1 + percent_cases_well_formed: 70.8 + error_outputs: 240 + num_malformed_responses: 54 + num_with_malformed_responses: 26 + user_asks: 10 + lazy_comments: 2 + syntax_errors: 0 + indentation_errors: 3 + exhausted_context_windows: 0 + test_timeouts: 0 + command: aider --model openrouter/anthropic/claude-3.5-sonnet + date: 2024-06-20 + versions: 0.38.1-dev + seconds_per_case: 51.9 + total_cost: 0.0000 \ No newline at end of file diff --git a/website/docs/leaderboards/index.md b/website/docs/leaderboards/index.md index 9993c665e..78d2abafc 100644 --- a/website/docs/leaderboards/index.md +++ b/website/docs/leaderboards/index.md @@ -19,7 +19,9 @@ it works best with models that score well on the benchmarks. ## Claude 3.5 Sonnet takes the top spot Claude 3.5 Sonnet is now the top ranked model on aider's code editing leaderboard. -DeepSeek Coder V2 took the #1 spot only 4 days ago. +DeepSeek Coder V2 only spent 4 days in the top spot. + +The new Sonnet came in 3rd on aider's refactoring leaderboard, behind GPT-4o and Opus. Sonnet ranked #1 when using the "whole" editing format, but it also scored very well with