mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-05 12:14:59 +00:00
added DeepSeek Chat V2 0628 to leaderboard
This commit is contained in:
parent
fa121cef8e
commit
5ae96231ad
2 changed files with 25 additions and 24 deletions
|
@ -317,31 +317,9 @@
|
||||||
seconds_per_case: 22.9
|
seconds_per_case: 22.9
|
||||||
total_cost: 2.7494
|
total_cost: 2.7494
|
||||||
|
|
||||||
- dirname: 2024-05-07-12-55-06--deepseek-chat-v2-whole
|
|
||||||
test_cases: 133
|
|
||||||
model: deepseek-chat v2 (whole)
|
|
||||||
edit_format: whole
|
|
||||||
commit_hash: b1cae73, db994fb
|
|
||||||
pass_rate_1: 50.4
|
|
||||||
pass_rate_2: 60.2
|
|
||||||
percent_cases_well_formed: 100.0
|
|
||||||
error_outputs: 3
|
|
||||||
num_malformed_responses: 0
|
|
||||||
user_asks: 3
|
|
||||||
lazy_comments: 13
|
|
||||||
syntax_errors: 0
|
|
||||||
indentation_errors: 2
|
|
||||||
exhausted_context_windows: 0
|
|
||||||
test_timeouts: 1
|
|
||||||
command: aider --model deepseek/deepseek-chat --edit-format whole
|
|
||||||
date: 2024-05-07
|
|
||||||
versions: 0.31.2-dev
|
|
||||||
seconds_per_case: 42.4
|
|
||||||
total_cost: 0.0000
|
|
||||||
|
|
||||||
- dirname: 2024-05-09-18-57-52--deepseek-chat-v2-diff-reverted-and-helpful-assistant2
|
- dirname: 2024-05-09-18-57-52--deepseek-chat-v2-diff-reverted-and-helpful-assistant2
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: deepseek-chat v2 (diff)
|
model: DeepSeek Chat V2 (original)
|
||||||
released: 2024-05-06
|
released: 2024-05-06
|
||||||
edit_format: diff
|
edit_format: diff
|
||||||
commit_hash: 80a3f6d
|
commit_hash: 80a3f6d
|
||||||
|
@ -728,3 +706,26 @@
|
||||||
seconds_per_case: 7.8
|
seconds_per_case: 7.8
|
||||||
total_cost: 0.0916
|
total_cost: 0.0916
|
||||||
|
|
||||||
|
- dirname: 2024-07-19-08-57-13--openrouter-deepseek-chat-v2-0628
|
||||||
|
test_cases: 133
|
||||||
|
model: DeepSeek Chat V2 0628
|
||||||
|
edit_format: diff
|
||||||
|
commit_hash: 96ff06e-dirty
|
||||||
|
pass_rate_1: 60.9
|
||||||
|
pass_rate_2: 69.9
|
||||||
|
percent_cases_well_formed: 97.7
|
||||||
|
error_outputs: 58
|
||||||
|
num_malformed_responses: 13
|
||||||
|
num_with_malformed_responses: 3
|
||||||
|
user_asks: 2
|
||||||
|
lazy_comments: 0
|
||||||
|
syntax_errors: 0
|
||||||
|
indentation_errors: 0
|
||||||
|
exhausted_context_windows: 0
|
||||||
|
test_timeouts: 2
|
||||||
|
command: aider --model deepseek/deepseek-chat
|
||||||
|
date: 2024-07-19
|
||||||
|
versions: 0.45.2-dev
|
||||||
|
seconds_per_case: 37.1
|
||||||
|
total_cost: 0.0000
|
||||||
|
|
|
@ -95,7 +95,7 @@ Pair program with AI.
|
||||||
|
|
||||||
## Top tier performance
|
## Top tier performance
|
||||||
|
|
||||||
[Aider has the one of the top scores on SWE Bench](https://aider.chat/2024/06/02/main-swe-bench.html).
|
[Aider has one of the top scores on SWE Bench](https://aider.chat/2024/06/02/main-swe-bench.html).
|
||||||
SWE Bench is a challenging software engineering benchmark where aider
|
SWE Bench is a challenging software engineering benchmark where aider
|
||||||
solved *real* GitHub issues from popular open source
|
solved *real* GitHub issues from popular open source
|
||||||
projects like django, scikitlearn, matplotlib, etc.
|
projects like django, scikitlearn, matplotlib, etc.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue