diff --git a/aider/website/_data/edit_leaderboard.yml b/aider/website/_data/edit_leaderboard.yml index 9fccbf448..ad894ff5e 100644 --- a/aider/website/_data/edit_leaderboard.yml +++ b/aider/website/_data/edit_leaderboard.yml @@ -317,31 +317,9 @@ seconds_per_case: 22.9 total_cost: 2.7494 -- dirname: 2024-05-07-12-55-06--deepseek-chat-v2-whole - test_cases: 133 - model: deepseek-chat v2 (whole) - edit_format: whole - commit_hash: b1cae73, db994fb - pass_rate_1: 50.4 - pass_rate_2: 60.2 - percent_cases_well_formed: 100.0 - error_outputs: 3 - num_malformed_responses: 0 - user_asks: 3 - lazy_comments: 13 - syntax_errors: 0 - indentation_errors: 2 - exhausted_context_windows: 0 - test_timeouts: 1 - command: aider --model deepseek/deepseek-chat --edit-format whole - date: 2024-05-07 - versions: 0.31.2-dev - seconds_per_case: 42.4 - total_cost: 0.0000 - - dirname: 2024-05-09-18-57-52--deepseek-chat-v2-diff-reverted-and-helpful-assistant2 test_cases: 133 - model: deepseek-chat v2 (diff) + model: DeepSeek Chat V2 (original) released: 2024-05-06 edit_format: diff commit_hash: 80a3f6d @@ -727,4 +705,27 @@ versions: 0.44.1-dev seconds_per_case: 7.8 total_cost: 0.0916 + +- dirname: 2024-07-19-08-57-13--openrouter-deepseek-chat-v2-0628 + test_cases: 133 + model: DeepSeek Chat V2 0628 + edit_format: diff + commit_hash: 96ff06e-dirty + pass_rate_1: 60.9 + pass_rate_2: 69.9 + percent_cases_well_formed: 97.7 + error_outputs: 58 + num_malformed_responses: 13 + num_with_malformed_responses: 3 + user_asks: 2 + lazy_comments: 0 + syntax_errors: 0 + indentation_errors: 0 + exhausted_context_windows: 0 + test_timeouts: 2 + command: aider --model deepseek/deepseek-chat + date: 2024-07-19 + versions: 0.45.2-dev + seconds_per_case: 37.1 + total_cost: 0.0000 \ No newline at end of file diff --git a/aider/website/index.md b/aider/website/index.md index b6645a19b..c7e4f1a25 100644 --- a/aider/website/index.md +++ b/aider/website/index.md @@ -95,7 +95,7 @@ Pair program with AI. ## Top tier performance -[Aider has the one of the top scores on SWE Bench](https://aider.chat/2024/06/02/main-swe-bench.html). +[Aider has one of the top scores on SWE Bench](https://aider.chat/2024/06/02/main-swe-bench.html). SWE Bench is a challenging software engineering benchmark where aider solved *real* GitHub issues from popular open source projects like django, scikitlearn, matplotlib, etc.