From e5e07f9507ad42d9d40e49b4dd7b15fc75bd1634 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Thu, 20 Jun 2024 08:29:16 -0700 Subject: [PATCH] copy --- website/_data/edit_leaderboard.yml | 4 ++-- website/docs/leaderboards/index.md | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/website/_data/edit_leaderboard.yml b/website/_data/edit_leaderboard.yml index 240e017fd..1b5bf476d 100644 --- a/website/_data/edit_leaderboard.yml +++ b/website/_data/edit_leaderboard.yml @@ -630,7 +630,7 @@ indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 0 - command: aider --model openrouter/anthropic/claude-3.5-sonnet + command: aider --model openrouter/anthropic/claude-3.5-sonnet --edit-format whole date: 2024-06-20 versions: 0.38.1-dev seconds_per_case: 15.4 @@ -638,7 +638,7 @@ - dirname: 2024-06-20-15-16-41--claude-3.5-sonnet-diff test_cases: 133 - model: openrouter/anthropic/claude-3.5-sonnet + model: claude-3.5-sonnet (diff) edit_format: diff commit_hash: 068609e-dirty pass_rate_1: 57.9 diff --git a/website/docs/leaderboards/index.md b/website/docs/leaderboards/index.md index bc6d72c12..9993c665e 100644 --- a/website/docs/leaderboards/index.md +++ b/website/docs/leaderboards/index.md @@ -19,13 +19,14 @@ it works best with models that score well on the benchmarks. ## Claude 3.5 Sonnet takes the top spot Claude 3.5 Sonnet is now the top ranked model on aider's code editing leaderboard. -DeepSeek Coder V2 previously took the #1 spot, only 4 days ago. +DeepSeek Coder V2 took the #1 spot only 4 days ago. Sonnet ranked #1 when using the "whole" editing format, but it also scored very well with aider's "diff" editing format. This format allows it to return code changes as diffs -- saving time and token costs, and making it practical to work with larger source files. +As such, aider uses "diff" by default with this new Sonnet model. ## Code editing leaderboard