From ecca73780340c90a5f843092e33da235f0e1bb8a Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 7 May 2024 06:26:39 -0700 Subject: [PATCH] added deepseek-chat v2 --- _data/edit_leaderboard.yml | 21 +++++++++++++++++++++ benchmark/benchmark.py | 1 - docs/llms.md | 4 ++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/_data/edit_leaderboard.yml b/_data/edit_leaderboard.yml index 51ec96298..cc2281a70 100644 --- a/_data/edit_leaderboard.yml +++ b/_data/edit_leaderboard.yml @@ -313,4 +313,25 @@ versions: 0.31.2-dev seconds_per_case: 22.9 total_cost: 2.7494 +- dirname: 2024-05-07-12-55-06--deepseek-chat-v2-whole + test_cases: 133 + model: deepseek-chat v2 + edit_format: whole + commit_hash: b1cae73, db994fb + pass_rate_1: 50.4 + pass_rate_2: 60.2 + percent_cases_well_formed: 100.0 + error_outputs: 3 + num_malformed_responses: 0 + user_asks: 3 + lazy_comments: 13 + syntax_errors: 0 + indentation_errors: 2 + exhausted_context_windows: 0 + test_timeouts: 1 + command: aider --model openai/deepseek-chat + date: 2024-05-07 + versions: 0.31.2-dev + seconds_per_case: 42.4 + total_cost: 0.0000 \ No newline at end of file diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index db56637dd..a0bc7a694 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -293,7 +293,6 @@ def load_results(dirname): def summarize_results(dirname): all_results = load_results(dirname) - dump(len(all_results)) res = SimpleNamespace() res.total_tests = len(list(Path(dirname).glob("*"))) diff --git a/docs/llms.md b/docs/llms.md index 9c377a77f..0293da4c1 100644 --- a/docs/llms.md +++ b/docs/llms.md @@ -15,7 +15,7 @@ Aider works with a number of **free** API providers: - Google's [Gemini 1.5 Pro](#gemini) is the most capable free model to use with aider, with code editing capabilities similar to GPT-3.5. - You can use [Llama 3 70B on Groq](#groq) which is comparable to GPT-3.5 in code editing performance. -- The [Deepseek Coder](#deepseek) model works well with aider, comparable to GPT-3.5. Deepseek.com currently offers 5M free tokens of API usage. +- The [Deepseek Chat v2](#deepseek) model work well with aider, better than GPT-3.5. Deepseek.com currently offers 5M free tokens of API usage. - Cohere also offers free API access to their [Command-R+ model](#cohere), which works with aider as a *very basic* coding assistant. ## Local models @@ -294,7 +294,7 @@ export OPENAI_API_BASE=https://api.deepseek.com/v1 setx OPENAI_API_KEY setx OPENAI_API_BASE https://api.deepseek.com/v1 -aider --model openai/deepseek-coder +aider --model openai/deepseek-chat ``` See the [model warnings](#model-warnings)