mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-29 16:54:59 +00:00
Added 3.5 sonnet
This commit is contained in:
parent
068609e4ef
commit
090e0cdcfe
3 changed files with 68 additions and 15 deletions
|
@ -178,6 +178,18 @@ MODEL_SETTINGS = [
|
||||||
"whole",
|
"whole",
|
||||||
weak_model_name="claude-3-haiku-20240307",
|
weak_model_name="claude-3-haiku-20240307",
|
||||||
),
|
),
|
||||||
|
ModelSettings(
|
||||||
|
"anthropic/claude-3.5-sonnet",
|
||||||
|
"diff",
|
||||||
|
weak_model_name="claude-3-haiku-20240307",
|
||||||
|
use_repo_map=True,
|
||||||
|
),
|
||||||
|
ModelSettings(
|
||||||
|
"openrouter/anthropic/claude-3.5-sonnet",
|
||||||
|
"diff",
|
||||||
|
weak_model_name="openrouter/anthropic/claude-3-haiku-20240307",
|
||||||
|
use_repo_map=True,
|
||||||
|
),
|
||||||
# Cohere
|
# Cohere
|
||||||
ModelSettings(
|
ModelSettings(
|
||||||
"command-r-plus",
|
"command-r-plus",
|
||||||
|
|
|
@ -611,4 +611,51 @@
|
||||||
date: 2024-06-08
|
date: 2024-06-08
|
||||||
versions: 0.37.1-dev
|
versions: 0.37.1-dev
|
||||||
seconds_per_case: 280.6
|
seconds_per_case: 280.6
|
||||||
total_cost: 0.0000
|
total_cost: 0.0000
|
||||||
|
|
||||||
|
- dirname: 2024-06-20-15-09-26--claude-3.5-sonnet-whole
|
||||||
|
test_cases: 133
|
||||||
|
model: claude-3.5-sonnet (whole)
|
||||||
|
edit_format: whole
|
||||||
|
commit_hash: 068609e
|
||||||
|
pass_rate_1: 61.7
|
||||||
|
pass_rate_2: 78.2
|
||||||
|
percent_cases_well_formed: 100.0
|
||||||
|
error_outputs: 4
|
||||||
|
num_malformed_responses: 0
|
||||||
|
num_with_malformed_responses: 0
|
||||||
|
user_asks: 2
|
||||||
|
lazy_comments: 0
|
||||||
|
syntax_errors: 0
|
||||||
|
indentation_errors: 0
|
||||||
|
exhausted_context_windows: 0
|
||||||
|
test_timeouts: 0
|
||||||
|
command: aider --model openrouter/anthropic/claude-3.5-sonnet
|
||||||
|
date: 2024-06-20
|
||||||
|
versions: 0.38.1-dev
|
||||||
|
seconds_per_case: 15.4
|
||||||
|
total_cost: 0.0000
|
||||||
|
|
||||||
|
- dirname: 2024-06-20-15-16-41--claude-3.5-sonnet-diff
|
||||||
|
test_cases: 133
|
||||||
|
model: openrouter/anthropic/claude-3.5-sonnet
|
||||||
|
edit_format: diff
|
||||||
|
commit_hash: 068609e-dirty
|
||||||
|
pass_rate_1: 57.9
|
||||||
|
pass_rate_2: 74.4
|
||||||
|
percent_cases_well_formed: 97.0
|
||||||
|
error_outputs: 48
|
||||||
|
num_malformed_responses: 11
|
||||||
|
num_with_malformed_responses: 4
|
||||||
|
user_asks: 0
|
||||||
|
lazy_comments: 0
|
||||||
|
syntax_errors: 0
|
||||||
|
indentation_errors: 0
|
||||||
|
exhausted_context_windows: 0
|
||||||
|
test_timeouts: 1
|
||||||
|
command: aider --model openrouter/anthropic/claude-3.5-sonnet
|
||||||
|
date: 2024-06-20
|
||||||
|
versions: 0.38.1-dev
|
||||||
|
seconds_per_case: 21.6
|
||||||
|
total_cost: 0.0000
|
||||||
|
|
|
@ -16,22 +16,16 @@ While [aider can connect to almost any LLM](/docs/llms.html),
|
||||||
it works best with models that score well on the benchmarks.
|
it works best with models that score well on the benchmarks.
|
||||||
|
|
||||||
|
|
||||||
## DeepSeek Coder V2 beats GPT-4o, Opus
|
## Claude 3.5 Sonnet takes the top spot
|
||||||
|
|
||||||
The new
|
Claude 3.5 Sonnet is now the top ranked model on aider's code editing leaderboard.
|
||||||
[DeepSeek Coder V2](https://aider.chat/docs/llms/deepseek.html)
|
DeepSeek Coder V2 previously took the #1 spot, only 4 days ago.
|
||||||
model is now atop aider's code editing leaderboard!
|
|
||||||
|
|
||||||
It's worth noting that DeepSeek Coder V2 is only capable of using aider's "whole" edit format.
|
|
||||||
This means it returns a modified full copy of each file when it makes changes.
|
|
||||||
Most other strong models are able to use aider's "diff" editing format,
|
|
||||||
which allows them to return diffs of edits -- saving time and token costs.
|
|
||||||
|
|
||||||
Models which use the "whole" edit format can only edit files
|
|
||||||
which fit within their output token limits.
|
|
||||||
These output limits are often as low as 4k tokens, even for models
|
|
||||||
with very large context windows.
|
|
||||||
|
|
||||||
|
Sonnet ranked #1 when using the "whole" editing format,
|
||||||
|
but it also scored very well with
|
||||||
|
aider's "diff" editing format.
|
||||||
|
This format allows it to return code changes as diffs -- saving time and token costs,
|
||||||
|
and making it practical to work with larger source files.
|
||||||
|
|
||||||
## Code editing leaderboard
|
## Code editing leaderboard
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue