From dda0a4e4abb7b83a05b2fe9c6a85ba6eb36dbfa0 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Mon, 17 Jun 2024 09:42:32 -0700 Subject: [PATCH] Added deepseek coder v2 --- website/_data/edit_leaderboard.yml | 32 +++++++++++++++--------------- website/docs/leaderboards/index.md | 16 +++++++++++++++ website/docs/llms.md | 1 + 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/website/_data/edit_leaderboard.yml b/website/_data/edit_leaderboard.yml index 5a4802184..379832540 100644 --- a/website/_data/edit_leaderboard.yml +++ b/website/_data/edit_leaderboard.yml @@ -44,28 +44,28 @@ seconds_per_case: 23.1 total_cost: 0.0000 -- dirname: 2024-04-29-19-17-28--deepseek-coder-whole - test_cases: 132 - model: deepseek-coder - released: 2024-01-25 +- dirname: 2024-06-17-14-45-54--deepseek-coder2-whole + test_cases: 133 + model: Deepseek Coder V2 edit_format: whole - commit_hash: c07f793-dirty - pass_rate_1: 47.0 - pass_rate_2: 54.5 + commit_hash: ca8672b + pass_rate_1: 63.9 + pass_rate_2: 75.2 percent_cases_well_formed: 100.0 - error_outputs: 0 + error_outputs: 1 num_malformed_responses: 0 - user_asks: 0 - lazy_comments: 2 - syntax_errors: 13 + num_with_malformed_responses: 0 + user_asks: 1 + lazy_comments: 0 + syntax_errors: 1 indentation_errors: 0 exhausted_context_windows: 0 - test_timeouts: 2 + test_timeouts: 7 command: aider --model deepseek/deepseek-coder - date: 2024-04-29 - versions: 0.30.2-dev - seconds_per_case: 26.7 - total_cost: 0.0000 + date: 2024-06-17 + versions: 0.38.1-dev + seconds_per_case: 21.1 + total_cost: 0.0537 - dirname: 2024-05-03-20-47-24--gemini-1.5-pro-diff-fenced test_cases: 133 diff --git a/website/docs/leaderboards/index.md b/website/docs/leaderboards/index.md index 8d0f189e7..e137d7927 100644 --- a/website/docs/leaderboards/index.md +++ b/website/docs/leaderboards/index.md @@ -16,6 +16,22 @@ While [aider can connect to almost any LLM](/docs/llms.html), it works best with models that score well on the benchmarks. +## Deepseek Coder V2 takes #1 + +The new +[Deepseek Coder V2](https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct) +model is now atop aider's code editing leaderboard! + +It's worth noting that it is only capable of using the "whole" edit format, +which means it returns a modified full copy of each file when it makes changes. +Most other strong models are able to use aider's "diff" editing format, +which allows them to return diffs of edits -- saving time and token costs. +Also, models which use the "whole" edit format can only edit files +which fit within their output token limits. +These output limits are often as low as 4k tokens, even for models +with very large context windows. + + ## Code editing leaderboard [Aider's code editing benchmark](/docs/benchmarks.html#the-benchmark) asks the LLM to edit python source files to complete 133 small coding exercises. This benchmark measures the LLM's coding ability, but also whether it can consistently emit code edits in the format specified in the system prompt. diff --git a/website/docs/llms.md b/website/docs/llms.md index e2cdf7ba6..44291f423 100644 --- a/website/docs/llms.md +++ b/website/docs/llms.md @@ -10,6 +10,7 @@ description: Aider can connect to most LLMs for AI pair programming. [![connecting to many LLMs](/assets/llms.jpg)](https://aider.chat/assets/llms.jpg) + ## Best models {: .no_toc }