mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-30 17:24:59 +00:00
updated models-over-time
This commit is contained in:
parent
370993cbed
commit
9b5a703307
4 changed files with 740 additions and 554 deletions
|
@ -274,7 +274,7 @@
|
||||||
- dirname: 2024-05-03-22-24-48--openrouter--llama3-diff-examples-sys-msg
|
- dirname: 2024-05-03-22-24-48--openrouter--llama3-diff-examples-sys-msg
|
||||||
test_cases: 132
|
test_cases: 132
|
||||||
model: llama3-70b-8192
|
model: llama3-70b-8192
|
||||||
released: 2024-04-18
|
_released: 2024-04-18
|
||||||
edit_format: diff
|
edit_format: diff
|
||||||
commit_hash: b5bb453
|
commit_hash: b5bb453
|
||||||
pass_rate_1: 38.6
|
pass_rate_1: 38.6
|
||||||
|
@ -297,7 +297,7 @@
|
||||||
- dirname: 2024-05-06-18-31-08--command-r-plus-whole-final
|
- dirname: 2024-05-06-18-31-08--command-r-plus-whole-final
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: command-r-plus
|
model: command-r-plus
|
||||||
released: 2024-04-04
|
_released: 2024-04-04
|
||||||
edit_format: whole
|
edit_format: whole
|
||||||
commit_hash: fc3a43e-dirty
|
commit_hash: fc3a43e-dirty
|
||||||
pass_rate_1: 21.8
|
pass_rate_1: 21.8
|
||||||
|
@ -671,7 +671,7 @@
|
||||||
commit_hash: f7ce78b-dirty
|
commit_hash: f7ce78b-dirty
|
||||||
pass_rate_1: 46.6
|
pass_rate_1: 46.6
|
||||||
pass_rate_2: 63.9
|
pass_rate_2: 63.9
|
||||||
released: 2024-07-23
|
_released: 2024-07-23
|
||||||
percent_cases_well_formed: 92.5
|
percent_cases_well_formed: 92.5
|
||||||
error_outputs: 84
|
error_outputs: 84
|
||||||
num_malformed_responses: 19
|
num_malformed_responses: 19
|
||||||
|
@ -691,6 +691,7 @@
|
||||||
- dirname: 2024-07-24-06-30-29--llama-405b-whole
|
- dirname: 2024-07-24-06-30-29--llama-405b-whole
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: llama-3.1-405b-instruct (whole)
|
model: llama-3.1-405b-instruct (whole)
|
||||||
|
released: 2024-07-23
|
||||||
edit_format: whole
|
edit_format: whole
|
||||||
commit_hash: a362dea-dirty
|
commit_hash: a362dea-dirty
|
||||||
pass_rate_1: 48.9
|
pass_rate_1: 48.9
|
||||||
|
@ -698,7 +699,6 @@
|
||||||
percent_cases_well_formed: 100.0
|
percent_cases_well_formed: 100.0
|
||||||
error_outputs: 0
|
error_outputs: 0
|
||||||
num_malformed_responses: 0
|
num_malformed_responses: 0
|
||||||
released: 2024-07-23
|
|
||||||
num_with_malformed_responses: 0
|
num_with_malformed_responses: 0
|
||||||
user_asks: 0
|
user_asks: 0
|
||||||
lazy_comments: 0
|
lazy_comments: 0
|
||||||
|
@ -770,7 +770,7 @@
|
||||||
percent_cases_well_formed: 100.0
|
percent_cases_well_formed: 100.0
|
||||||
error_outputs: 27
|
error_outputs: 27
|
||||||
num_malformed_responses: 0
|
num_malformed_responses: 0
|
||||||
released: 2024-07-23
|
_released: 2024-07-23
|
||||||
num_with_malformed_responses: 0
|
num_with_malformed_responses: 0
|
||||||
user_asks: 23
|
user_asks: 23
|
||||||
lazy_comments: 8
|
lazy_comments: 8
|
||||||
|
@ -796,7 +796,7 @@
|
||||||
num_malformed_responses: 0
|
num_malformed_responses: 0
|
||||||
num_with_malformed_responses: 0
|
num_with_malformed_responses: 0
|
||||||
user_asks: 0
|
user_asks: 0
|
||||||
released: 2024-07-23
|
_released: 2024-07-23
|
||||||
lazy_comments: 0
|
lazy_comments: 0
|
||||||
syntax_errors: 0
|
syntax_errors: 0
|
||||||
indentation_errors: 0
|
indentation_errors: 0
|
||||||
|
@ -946,7 +946,7 @@
|
||||||
versions: 0.54.13.dev
|
versions: 0.54.13.dev
|
||||||
seconds_per_case: 8.3
|
seconds_per_case: 8.3
|
||||||
total_cost: 0.0000
|
total_cost: 0.0000
|
||||||
released: 2024-09-04
|
_released: 2024-09-04
|
||||||
|
|
||||||
- dirname: 2024-09-04-16-17-33--yi-coder-9b-chat-q4_0-whole
|
- dirname: 2024-09-04-16-17-33--yi-coder-9b-chat-q4_0-whole
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
|
@ -973,6 +973,7 @@
|
||||||
|
|
||||||
- dirname: 2024-09-05-14-50-11--deepseek-sep5-no-shell
|
- dirname: 2024-09-05-14-50-11--deepseek-sep5-no-shell
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
|
released: 2024-09-05
|
||||||
model: DeepSeek V2.5
|
model: DeepSeek V2.5
|
||||||
edit_format: diff
|
edit_format: diff
|
||||||
commit_hash: 1279c86
|
commit_hash: 1279c86
|
||||||
|
@ -1112,6 +1113,7 @@
|
||||||
- dirname: 2024-09-21-16-45-11--o1-preview-flex-sr-markers
|
- dirname: 2024-09-21-16-45-11--o1-preview-flex-sr-markers
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: o1-preview
|
model: o1-preview
|
||||||
|
released: 2024-09-12
|
||||||
edit_format: diff
|
edit_format: diff
|
||||||
commit_hash: 5493654-dirty
|
commit_hash: 5493654-dirty
|
||||||
pass_rate_1: 57.9
|
pass_rate_1: 57.9
|
||||||
|
@ -1477,6 +1479,7 @@
|
||||||
- dirname: 2024-10-04-16-30-08--chatgpt-4o-latest-diff-oct4
|
- dirname: 2024-10-04-16-30-08--chatgpt-4o-latest-diff-oct4
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: openai/chatgpt-4o-latest
|
model: openai/chatgpt-4o-latest
|
||||||
|
released: 2024-10-04
|
||||||
edit_format: diff
|
edit_format: diff
|
||||||
commit_hash: af10953
|
commit_hash: af10953
|
||||||
pass_rate_1: 56.4
|
pass_rate_1: 56.4
|
||||||
|
@ -1592,6 +1595,7 @@
|
||||||
- dirname: 2024-10-22-17-45-28--sonnet-1022-diff-fixed-model-settings
|
- dirname: 2024-10-22-17-45-28--sonnet-1022-diff-fixed-model-settings
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: claude-3-5-sonnet-20241022
|
model: claude-3-5-sonnet-20241022
|
||||||
|
released: 2024-10-22
|
||||||
edit_format: diff
|
edit_format: diff
|
||||||
commit_hash: 3b14eb9
|
commit_hash: 3b14eb9
|
||||||
pass_rate_1: 69.2
|
pass_rate_1: 69.2
|
||||||
|
@ -1615,6 +1619,7 @@
|
||||||
- dirname: 2024-11-04-19-19-32--haiku35-diff-ex-as-sys-false
|
- dirname: 2024-11-04-19-19-32--haiku35-diff-ex-as-sys-false
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: claude-3-5-haiku-20241022
|
model: claude-3-5-haiku-20241022
|
||||||
|
released: 2024-10-22
|
||||||
edit_format: diff
|
edit_format: diff
|
||||||
commit_hash: 03bbdb0-dirty
|
commit_hash: 03bbdb0-dirty
|
||||||
pass_rate_1: 61.7
|
pass_rate_1: 61.7
|
||||||
|
@ -1773,32 +1778,10 @@
|
||||||
seconds_per_case: 18.3
|
seconds_per_case: 18.3
|
||||||
total_cost: 0.0000
|
total_cost: 0.0000
|
||||||
|
|
||||||
- dirname: 2024-11-09-10-57-11--Qwen2.5-Coder-32B-Instruct
|
|
||||||
test_cases: 133
|
|
||||||
model: Qwen2.5-Coder-32B-Instruct (whole)
|
|
||||||
edit_format: whole
|
|
||||||
commit_hash: ec9982a
|
|
||||||
pass_rate_1: 60.9
|
|
||||||
pass_rate_2: 73.7
|
|
||||||
percent_cases_well_formed: 100.0
|
|
||||||
error_outputs: 1
|
|
||||||
num_malformed_responses: 0
|
|
||||||
num_with_malformed_responses: 0
|
|
||||||
user_asks: 1
|
|
||||||
lazy_comments: 0
|
|
||||||
syntax_errors: 0
|
|
||||||
indentation_errors: 0
|
|
||||||
exhausted_context_windows: 1
|
|
||||||
test_timeouts: 1
|
|
||||||
command: aider --model openai/Qwen2.5-Coder-32B-Instruct
|
|
||||||
date: 2024-11-09
|
|
||||||
versions: 0.59.2.dev
|
|
||||||
seconds_per_case: 26.6
|
|
||||||
total_cost: 0.0000
|
|
||||||
|
|
||||||
- dirname: 2024-11-09-11-09-15--Qwen2.5-Coder-32B-Instruct
|
- dirname: 2024-11-09-11-09-15--Qwen2.5-Coder-32B-Instruct
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: Qwen2.5-Coder-32B-Instruct (diff)
|
model: Qwen2.5-Coder-32B-Instruct (diff)
|
||||||
|
released: 2024-11-12
|
||||||
edit_format: diff
|
edit_format: diff
|
||||||
commit_hash: ec9982a
|
commit_hash: ec9982a
|
||||||
pass_rate_1: 59.4
|
pass_rate_1: 59.4
|
||||||
|
@ -1822,6 +1805,7 @@
|
||||||
- dirname: 2024-11-20-14-57-11--mistral-2411-direct-diff
|
- dirname: 2024-11-20-14-57-11--mistral-2411-direct-diff
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: Mistral Large (2411)
|
model: Mistral Large (2411)
|
||||||
|
released: 2024-11-18
|
||||||
edit_format: diff
|
edit_format: diff
|
||||||
commit_hash: dba844c
|
commit_hash: dba844c
|
||||||
pass_rate_1: 46.6
|
pass_rate_1: 46.6
|
||||||
|
@ -1845,6 +1829,7 @@
|
||||||
- dirname: 2024-11-20-19-28-30--gpt-4o-2024-11-20
|
- dirname: 2024-11-20-19-28-30--gpt-4o-2024-11-20
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: gpt-4o-2024-11-20
|
model: gpt-4o-2024-11-20
|
||||||
|
released: 2024-11-20
|
||||||
edit_format: diff
|
edit_format: diff
|
||||||
commit_hash: 2ac0776-dirty
|
commit_hash: 2ac0776-dirty
|
||||||
pass_rate_1: 58.6
|
pass_rate_1: 58.6
|
||||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 86 KiB After Width: | Height: | Size: 155 KiB |
File diff suppressed because it is too large
Load diff
Before Width: | Height: | Size: 74 KiB After Width: | Height: | Size: 81 KiB |
|
@ -131,7 +131,7 @@ def plot_over_time(yaml_file):
|
||||||
alpha=0.75,
|
alpha=0.75,
|
||||||
xytext=(5, 5),
|
xytext=(5, 5),
|
||||||
textcoords="offset points",
|
textcoords="offset points",
|
||||||
rotation=45,
|
rotation=30,
|
||||||
)
|
)
|
||||||
|
|
||||||
ax.set_xlabel("Model release date", fontsize=18, color="#555")
|
ax.set_xlabel("Model release date", fontsize=18, color="#555")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue