mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-11 15:14:59 +00:00
moved website/ -> aider/website/
This commit is contained in:
parent
eb80b32915
commit
22a494bb59
155 changed files with 9 additions and 9 deletions
684
aider/website/_data/edit_leaderboard.yml
Normal file
684
aider/website/_data/edit_leaderboard.yml
Normal file
|
@ -0,0 +1,684 @@
|
|||
- dirname: 2024-05-01-20-05-59--direct-opus-filenames-outside-fence
|
||||
test_cases: 133
|
||||
model: claude-3-opus-20240229
|
||||
released: 2024-02-29
|
||||
edit_format: diff
|
||||
commit_hash: f4b1797-dirty, f4b1797
|
||||
pass_rate_1: 53.4
|
||||
pass_rate_2: 68.4
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 2
|
||||
num_malformed_responses: 0
|
||||
user_asks: 0
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 1
|
||||
command: aider --opus
|
||||
date: 2024-05-01
|
||||
versions: 0.30.2-dev
|
||||
seconds_per_case: 32.4
|
||||
total_cost: 13.8395
|
||||
|
||||
- dirname: 2024-03-06-16-42-00--claude3-sonnet-whole
|
||||
test_cases: 133
|
||||
model: claude-3-sonnet-20240229
|
||||
released: 2024-02-29
|
||||
edit_format: whole
|
||||
commit_hash: a5f8076-dirty
|
||||
pass_rate_1: 43.6
|
||||
pass_rate_2: 54.9
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 1
|
||||
num_malformed_responses: 0
|
||||
user_asks: 1
|
||||
lazy_comments: 1
|
||||
syntax_errors: 2
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 7
|
||||
command: aider --sonnet
|
||||
date: 2024-03-06
|
||||
versions: 0.25.1-dev
|
||||
seconds_per_case: 23.1
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-05-03-20-47-24--gemini-1.5-pro-diff-fenced
|
||||
test_cases: 133
|
||||
model: gemini-1.5-pro-latest
|
||||
edit_format: diff-fenced
|
||||
commit_hash: 3a48dfb, 5d32dd7
|
||||
pass_rate_1: 45.9
|
||||
pass_rate_2: 57.1
|
||||
percent_cases_well_formed: 87.2
|
||||
error_outputs: 60
|
||||
num_malformed_responses: 17
|
||||
user_asks: 3
|
||||
lazy_comments: 0
|
||||
syntax_errors: 8
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 3
|
||||
command: aider --model gemini/gemini-1.5-pro-latest
|
||||
date: 2024-05-03
|
||||
versions: 0.31.2-dev
|
||||
seconds_per_case: 21.3
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-05-08-20-59-15--may-gpt-3.5-turbo-whole
|
||||
test_cases: 133
|
||||
model: gpt-3.5-turbo-0125
|
||||
released: 2024-01-25
|
||||
edit_format: whole
|
||||
commit_hash: 1d55f74
|
||||
pass_rate_1: 41.4
|
||||
pass_rate_2: 50.4
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 0
|
||||
num_malformed_responses: 0
|
||||
user_asks: 0
|
||||
lazy_comments: 0
|
||||
syntax_errors: 3
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 4
|
||||
command: aider -3
|
||||
date: 2024-05-08
|
||||
versions: 0.33.1-dev
|
||||
seconds_per_case: 6.5
|
||||
total_cost: 0.5032
|
||||
|
||||
- dirname: 2023-11-06-21-23-59--gpt-3.5-turbo-0301
|
||||
test_cases: 133
|
||||
model: gpt-3.5-turbo-0301
|
||||
released: 2023-03-01
|
||||
edit_format: whole
|
||||
commit_hash: 44388db-dirty
|
||||
pass_rate_1: 50.4
|
||||
pass_rate_2: 57.9
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 1
|
||||
num_malformed_responses: 0
|
||||
user_asks: 1
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 8
|
||||
command: aider --model gpt-3.5-turbo-0301
|
||||
date: 2023-11-06
|
||||
versions: 0.16.4-dev
|
||||
seconds_per_case: 6.5
|
||||
total_cost: 0.4822
|
||||
|
||||
- dirname: 2023-11-07-02-41-07--gpt-3.5-turbo-0613
|
||||
test_cases: 133
|
||||
model: gpt-3.5-turbo-0613
|
||||
released: 2023-06-13
|
||||
edit_format: whole
|
||||
commit_hash: 93aa497-dirty
|
||||
pass_rate_1: 38.3
|
||||
pass_rate_2: 50.4
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 1
|
||||
num_malformed_responses: 0
|
||||
user_asks: 1
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 5
|
||||
command: aider --model gpt-3.5-turbo-0613
|
||||
date: 2023-11-07
|
||||
versions: 0.16.4-dev
|
||||
seconds_per_case: 18.0
|
||||
total_cost: 0.5366
|
||||
- dirname: 2024-04-30-21-40-51--litellm-gpt-3.5-turbo-1106-again
|
||||
test_cases: 132
|
||||
model: gpt-3.5-turbo-1106
|
||||
edit_format: whole
|
||||
commit_hash: 7b14d77
|
||||
pass_rate_1: 45.5
|
||||
pass_rate_2: 56.1
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 1
|
||||
num_malformed_responses: 0
|
||||
user_asks: 1
|
||||
lazy_comments: 0
|
||||
syntax_errors: 19
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 0
|
||||
command: aider --model gpt-3.5-turbo-1106
|
||||
date: 2024-04-30
|
||||
versions: 0.30.2-dev
|
||||
seconds_per_case: 5.3
|
||||
total_cost: 0.3261
|
||||
|
||||
- dirname: 2024-01-25-23-37-15--jan-exercism-gpt-4-0125-preview-udiff
|
||||
test_cases: 133
|
||||
model: gpt-4-0125-preview
|
||||
released: 2024-01-25
|
||||
edit_format: udiff
|
||||
commit_hash: edcf9b1
|
||||
pass_rate_1: 55.6
|
||||
pass_rate_2: 66.2
|
||||
percent_cases_well_formed: 97.7
|
||||
error_outputs: 6
|
||||
num_malformed_responses: 3
|
||||
user_asks: 0
|
||||
lazy_comments: 0
|
||||
syntax_errors: 3
|
||||
indentation_errors: 7
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 4
|
||||
command: aider --model gpt-4-0125-preview
|
||||
date: 2024-01-25
|
||||
versions: 0.22.1-dev
|
||||
seconds_per_case: 44.8
|
||||
total_cost: 14.6428
|
||||
|
||||
- dirname: 2024-05-04-15-07-30--redo-gpt-4-0314-diff-reminder-rules
|
||||
test_cases: 133
|
||||
model: gpt-4-0314
|
||||
released: 2023-03-14
|
||||
edit_format: diff
|
||||
commit_hash: 0d43468
|
||||
pass_rate_1: 50.4
|
||||
pass_rate_2: 66.2
|
||||
percent_cases_well_formed: 93.2
|
||||
error_outputs: 28
|
||||
num_malformed_responses: 9
|
||||
user_asks: 1
|
||||
lazy_comments: 3
|
||||
syntax_errors: 9
|
||||
indentation_errors: 7
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 3
|
||||
command: aider --model gpt-4-0314
|
||||
date: 2024-05-04
|
||||
versions: 0.31.2-dev
|
||||
seconds_per_case: 19.8
|
||||
total_cost: 16.2689
|
||||
|
||||
- dirname: 2023-12-16-21-24-28--editblock-gpt-4-0613-actual-main
|
||||
test_cases: 133
|
||||
model: gpt-4-0613
|
||||
released: 2023-06-13
|
||||
edit_format: diff
|
||||
commit_hash: 3aa17c4
|
||||
pass_rate_1: 46.6
|
||||
pass_rate_2: 67.7
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 14
|
||||
num_malformed_responses: 0
|
||||
user_asks: 0
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 2
|
||||
command: aider -4
|
||||
date: 2023-12-16
|
||||
versions: 0.18.2-dev
|
||||
seconds_per_case: 33.6
|
||||
total_cost: 17.4657
|
||||
|
||||
- dirname: 2024-05-08-21-16-03--may-gpt-4-1106-preview-udiff
|
||||
test_cases: 133
|
||||
model: gpt-4-1106-preview
|
||||
released: 2023-11-06
|
||||
edit_format: udiff
|
||||
commit_hash: 87664dc
|
||||
pass_rate_1: 51.9
|
||||
pass_rate_2: 65.4
|
||||
percent_cases_well_formed: 92.5
|
||||
error_outputs: 30
|
||||
num_malformed_responses: 10
|
||||
user_asks: 0
|
||||
lazy_comments: 3
|
||||
syntax_errors: 11
|
||||
indentation_errors: 2
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 1
|
||||
command: aider --model gpt-4-1106-preview
|
||||
date: 2024-05-08
|
||||
versions: 0.33.1-dev
|
||||
seconds_per_case: 20.4
|
||||
total_cost: 6.6061
|
||||
|
||||
- dirname: 2024-05-01-02-09-20--gpt-4-turbo-examples
|
||||
test_cases: 133
|
||||
model: gpt-4-turbo-2024-04-09 (udiff)
|
||||
released: 2024-04-09
|
||||
edit_format: udiff
|
||||
commit_hash: e610e5b-dirty
|
||||
pass_rate_1: 48.1
|
||||
pass_rate_2: 63.9
|
||||
percent_cases_well_formed: 97.0
|
||||
error_outputs: 12
|
||||
num_malformed_responses: 4
|
||||
user_asks: 0
|
||||
lazy_comments: 0
|
||||
syntax_errors: 4
|
||||
indentation_errors: 2
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 3
|
||||
command: aider --gpt-4-turbo
|
||||
date: 2024-05-01
|
||||
versions: 0.30.2-dev
|
||||
seconds_per_case: 22.8
|
||||
total_cost: 6.3337
|
||||
|
||||
- dirname: 2024-05-03-22-24-48--openrouter--llama3-diff-examples-sys-msg
|
||||
test_cases: 132
|
||||
model: llama3-70b-8192
|
||||
released: 2024-04-18
|
||||
edit_format: diff
|
||||
commit_hash: b5bb453
|
||||
pass_rate_1: 38.6
|
||||
pass_rate_2: 49.2
|
||||
percent_cases_well_formed: 73.5
|
||||
error_outputs: 105
|
||||
num_malformed_responses: 35
|
||||
user_asks: 0
|
||||
lazy_comments: 0
|
||||
syntax_errors: 1
|
||||
indentation_errors: 2
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 3
|
||||
command: aider --model groq/llama3-70b-8192
|
||||
date: 2024-05-03
|
||||
versions: 0.31.2-dev
|
||||
seconds_per_case: 14.5
|
||||
total_cost: 0.4311
|
||||
|
||||
- dirname: 2024-05-06-18-31-08--command-r-plus-whole-final
|
||||
test_cases: 133
|
||||
model: command-r-plus
|
||||
released: 2024-04-04
|
||||
edit_format: whole
|
||||
commit_hash: fc3a43e-dirty
|
||||
pass_rate_1: 21.8
|
||||
pass_rate_2: 31.6
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 0
|
||||
num_malformed_responses: 0
|
||||
user_asks: 0
|
||||
lazy_comments: 1
|
||||
syntax_errors: 5
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 7
|
||||
command: aider --model command-r-plus
|
||||
date: 2024-05-06
|
||||
versions: 0.31.2-dev
|
||||
seconds_per_case: 22.9
|
||||
total_cost: 2.7494
|
||||
|
||||
- dirname: 2024-05-07-12-55-06--deepseek-chat-v2-whole
|
||||
test_cases: 133
|
||||
model: deepseek-chat v2 (whole)
|
||||
edit_format: whole
|
||||
commit_hash: b1cae73, db994fb
|
||||
pass_rate_1: 50.4
|
||||
pass_rate_2: 60.2
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 3
|
||||
num_malformed_responses: 0
|
||||
user_asks: 3
|
||||
lazy_comments: 13
|
||||
syntax_errors: 0
|
||||
indentation_errors: 2
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 1
|
||||
command: aider --model deepseek/deepseek-chat --edit-format whole
|
||||
date: 2024-05-07
|
||||
versions: 0.31.2-dev
|
||||
seconds_per_case: 42.4
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-05-09-18-57-52--deepseek-chat-v2-diff-reverted-and-helpful-assistant2
|
||||
test_cases: 133
|
||||
model: deepseek-chat v2 (diff)
|
||||
released: 2024-05-06
|
||||
edit_format: diff
|
||||
commit_hash: 80a3f6d
|
||||
pass_rate_1: 44.4
|
||||
pass_rate_2: 60.9
|
||||
percent_cases_well_formed: 97.0
|
||||
error_outputs: 14
|
||||
num_malformed_responses: 4
|
||||
user_asks: 2
|
||||
lazy_comments: 0
|
||||
syntax_errors: 13
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 3
|
||||
command: aider --model deepseek/deepseek-chat
|
||||
date: 2024-05-09
|
||||
versions: 0.33.1-dev
|
||||
seconds_per_case: 86.8
|
||||
total_cost: 0.0941
|
||||
|
||||
- dirname: 2024-05-07-20-32-37--qwen1.5-110b-chat-whole
|
||||
test_cases: 133
|
||||
model: qwen1.5-110b-chat
|
||||
released: 2024-02-04
|
||||
edit_format: whole
|
||||
commit_hash: 70b1c0c
|
||||
pass_rate_1: 30.8
|
||||
pass_rate_2: 37.6
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 3
|
||||
num_malformed_responses: 0
|
||||
user_asks: 3
|
||||
lazy_comments: 20
|
||||
syntax_errors: 0
|
||||
indentation_errors: 6
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 3
|
||||
command: aider --model together_ai/qwen/qwen1.5-110b-chat
|
||||
date: 2024-05-07
|
||||
versions: 0.31.2-dev
|
||||
seconds_per_case: 46.9
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-05-07-20-57-04--wizardlm-2-8x22b-whole
|
||||
test_cases: 133
|
||||
model: WizardLM-2 8x22B
|
||||
edit_format: whole
|
||||
commit_hash: 8e272bf, bbe8639
|
||||
pass_rate_1: 27.8
|
||||
pass_rate_2: 44.4
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 0
|
||||
num_malformed_responses: 0
|
||||
user_asks: 0
|
||||
lazy_comments: 1
|
||||
syntax_errors: 2
|
||||
indentation_errors: 2
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 0
|
||||
command: aider --model openrouter/microsoft/wizardlm-2-8x22b
|
||||
date: 2024-05-07
|
||||
versions: 0.31.2-dev
|
||||
seconds_per_case: 36.6
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-05-13-17-39-05--gpt-4o-diff
|
||||
test_cases: 133
|
||||
model: gpt-4o
|
||||
released: 2024-05-13
|
||||
edit_format: diff
|
||||
commit_hash: b6cd852
|
||||
pass_rate_1: 60.2
|
||||
pass_rate_2: 72.9
|
||||
percent_cases_well_formed: 96.2
|
||||
error_outputs: 103
|
||||
num_malformed_responses: 5
|
||||
user_asks: 0
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 2
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 1
|
||||
command: aider
|
||||
date: 2024-05-13
|
||||
versions: 0.34.1-dev
|
||||
seconds_per_case: 6.0
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-04-12-22-18-20--gpt-4-turbo-2024-04-09-plain-diff
|
||||
test_cases: 33
|
||||
model: gpt-4-turbo-2024-04-09 (diff)
|
||||
edit_format: diff
|
||||
commit_hash: 9b2e697-dirty
|
||||
pass_rate_1: 48.5
|
||||
pass_rate_2: 57.6
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 15
|
||||
num_malformed_responses: 0
|
||||
user_asks: 15
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 0
|
||||
command: aider --model gpt-4-turbo-2024-04-09
|
||||
date: 2024-04-12
|
||||
versions: 0.28.1-dev
|
||||
seconds_per_case: 17.6
|
||||
total_cost: 1.6205
|
||||
|
||||
- dirname: 2024-06-08-22-37-55--qwen2-72b-instruct-whole
|
||||
test_cases: 133
|
||||
model: Qwen2 72B Instruct
|
||||
edit_format: whole
|
||||
commit_hash: 02c7335-dirty, 1a97498-dirty
|
||||
pass_rate_1: 44.4
|
||||
pass_rate_2: 55.6
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 3
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 3
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 1
|
||||
command: aider --model together_ai/qwen/Qwen2-72B-Instruct
|
||||
date: 2024-06-08
|
||||
versions: 0.37.1-dev
|
||||
seconds_per_case: 14.3
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-06-08-23-45-41--gemini-1.5-flash-latest-whole
|
||||
test_cases: 133
|
||||
model: gemini-1.5-flash-latest
|
||||
edit_format: whole
|
||||
commit_hash: 86ea47f-dirty
|
||||
pass_rate_1: 33.8
|
||||
pass_rate_2: 44.4
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 16
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 12
|
||||
lazy_comments: 0
|
||||
syntax_errors: 9
|
||||
indentation_errors: 1
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 3
|
||||
command: aider --model gemini/gemini-1.5-flash-latest
|
||||
date: 2024-06-08
|
||||
versions: 0.37.1-dev
|
||||
seconds_per_case: 7.2
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-06-09-03-28-21--codestral-whole
|
||||
test_cases: 133
|
||||
model: codestral-2405
|
||||
edit_format: whole
|
||||
commit_hash: effc88a
|
||||
pass_rate_1: 35.3
|
||||
pass_rate_2: 51.1
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 4
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 4
|
||||
lazy_comments: 1
|
||||
syntax_errors: 0
|
||||
indentation_errors: 1
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 4
|
||||
command: aider --model mistral/codestral-2405
|
||||
date: 2024-06-09
|
||||
versions: 0.37.1-dev
|
||||
seconds_per_case: 7.5
|
||||
total_cost: 0.6805
|
||||
|
||||
- dirname: 2024-06-08-19-25-26--codeqwen:7b-chat-v1.5-q8_0-whole
|
||||
test_cases: 133
|
||||
model: codeqwen:7b-chat-v1.5-q8_0
|
||||
edit_format: whole
|
||||
commit_hash: be0520f-dirty
|
||||
pass_rate_1: 32.3
|
||||
pass_rate_2: 34.6
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 8
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 8
|
||||
lazy_comments: 0
|
||||
syntax_errors: 1
|
||||
indentation_errors: 2
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 1
|
||||
command: aider --model ollama/codeqwen:7b-chat-v1.5-q8_0
|
||||
date: 2024-06-08
|
||||
versions: 0.37.1-dev
|
||||
seconds_per_case: 15.6
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-06-08-16-12-31--codestral:22b-v0.1-q8_0-whole
|
||||
test_cases: 133
|
||||
model: codestral:22b-v0.1-q8_0
|
||||
edit_format: whole
|
||||
commit_hash: be0520f-dirty
|
||||
pass_rate_1: 35.3
|
||||
pass_rate_2: 48.1
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 8
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 8
|
||||
lazy_comments: 2
|
||||
syntax_errors: 0
|
||||
indentation_errors: 1
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 3
|
||||
command: aider --model ollama/codestral:22b-v0.1-q8_0
|
||||
date: 2024-06-08
|
||||
versions: 0.37.1-dev
|
||||
seconds_per_case: 46.4
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-06-08-17-54-04--qwen2:72b-instruct-q8_0-whole
|
||||
test_cases: 133
|
||||
model: qwen2:72b-instruct-q8_0
|
||||
edit_format: whole
|
||||
commit_hash: 74e51d5-dirty
|
||||
pass_rate_1: 43.6
|
||||
pass_rate_2: 49.6
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 27
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 27
|
||||
lazy_comments: 0
|
||||
syntax_errors: 5
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 0
|
||||
command: aider --model ollama/qwen2:72b-instruct-q8_0
|
||||
date: 2024-06-08
|
||||
versions: 0.37.1-dev
|
||||
seconds_per_case: 280.6
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-07-04-14-32-08--claude-3.5-sonnet-diff-continue
|
||||
test_cases: 133
|
||||
model: claude-3.5-sonnet
|
||||
edit_format: diff
|
||||
commit_hash: 35f21b5
|
||||
pass_rate_1: 57.1
|
||||
pass_rate_2: 77.4
|
||||
percent_cases_well_formed: 99.2
|
||||
error_outputs: 23
|
||||
num_malformed_responses: 4
|
||||
num_with_malformed_responses: 1
|
||||
user_asks: 2
|
||||
lazy_comments: 0
|
||||
syntax_errors: 1
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 1
|
||||
command: aider --sonnet
|
||||
date: 2024-07-04
|
||||
versions: 0.42.1-dev
|
||||
seconds_per_case: 17.6
|
||||
total_cost: 3.6346
|
||||
|
||||
- dirname: 2024-06-17-14-45-54--deepseek-coder2-whole
|
||||
test_cases: 133
|
||||
model: DeepSeek Coder V2 (whole)
|
||||
edit_format: whole
|
||||
commit_hash: ca8672b
|
||||
pass_rate_1: 63.9
|
||||
pass_rate_2: 75.2
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 1
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 1
|
||||
lazy_comments: 0
|
||||
syntax_errors: 1
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 7
|
||||
command: aider --model deepseek/deepseek-coder
|
||||
date: 2024-06-17
|
||||
versions: 0.38.1-dev
|
||||
seconds_per_case: 21.1
|
||||
total_cost: 0.0537
|
||||
|
||||
- dirname: 2024-06-21-15-29-08--deepseek-coder2-diff-again3
|
||||
test_cases: 133
|
||||
model: DeepSeek Coder V2 (diff)
|
||||
edit_format: diff
|
||||
commit_hash: 515ab3e
|
||||
pass_rate_1: 58.6
|
||||
pass_rate_2: 66.2
|
||||
percent_cases_well_formed: 98.5
|
||||
error_outputs: 23
|
||||
num_malformed_responses: 5
|
||||
num_with_malformed_responses: 2
|
||||
user_asks: 2
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 1
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 2
|
||||
command: aider --model deepseek/deepseek-coder
|
||||
date: 2024-06-21
|
||||
versions: 0.39.1-dev
|
||||
seconds_per_case: 30.2
|
||||
total_cost: 0.0857
|
||||
|
||||
- dirname: 2024-07-01-21-41-48--haiku-whole
|
||||
test_cases: 133
|
||||
model: claude-3-haiku-20240307
|
||||
edit_format: whole
|
||||
commit_hash: 75f506d
|
||||
pass_rate_1: 40.6
|
||||
pass_rate_2: 47.4
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 6
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 0
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 2
|
||||
command: aider --model claude-3-haiku-20240307
|
||||
date: 2024-07-01
|
||||
versions: 0.41.1-dev
|
||||
seconds_per_case: 7.1
|
||||
total_cost: 0.1946
|
||||
|
190
aider/website/_data/refactor_leaderboard.yml
Normal file
190
aider/website/_data/refactor_leaderboard.yml
Normal file
|
@ -0,0 +1,190 @@
|
|||
- dirname: 2024-05-04-23-27-02--refac-gemini
|
||||
test_cases: 89
|
||||
model: gemini/gemini-1.5-pro-latest
|
||||
edit_format: diff-fenced
|
||||
commit_hash: a0649ba-dirty, 425cb29, 1b35ca2-dirty, 3e4fca2-dirty
|
||||
pass_rate_1: 49.4
|
||||
percent_cases_well_formed: 7.9
|
||||
error_outputs: 247
|
||||
num_malformed_responses: 82
|
||||
user_asks: 0
|
||||
lazy_comments: 4
|
||||
syntax_errors: 0
|
||||
indentation_errors: 8
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 0
|
||||
command: aider --model gemini/gemini-1.5-pro-latest
|
||||
date: 2024-05-04
|
||||
versions: 0.31.2-dev
|
||||
seconds_per_case: 55.7
|
||||
total_cost: 0.0000
|
||||
- dirname: 2024-05-04-17-45-53--refac-opus
|
||||
test_cases: 83
|
||||
model: claude-3-opus-20240229
|
||||
edit_format: diff
|
||||
commit_hash: b02320b-dirty
|
||||
pass_rate_1: 72.3
|
||||
percent_cases_well_formed: 79.5
|
||||
error_outputs: 51
|
||||
num_malformed_responses: 17
|
||||
user_asks: 0
|
||||
lazy_comments: 2
|
||||
syntax_errors: 1
|
||||
indentation_errors: 3
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 0
|
||||
command: aider --opus
|
||||
date: 2024-05-04
|
||||
versions: 0.31.2-dev
|
||||
seconds_per_case: 67.8
|
||||
total_cost: 27.9176
|
||||
- dirname: 2024-04-09-21-49-54--refac-gpt-4-turbo-2024-04-09
|
||||
test_cases: 88
|
||||
model: gpt-4-turbo-2024-04-09 (udiff)
|
||||
edit_format: udiff
|
||||
commit_hash: b75fdb9
|
||||
pass_rate_1: 34.1
|
||||
percent_cases_well_formed: 30.7
|
||||
error_outputs: 183
|
||||
num_malformed_responses: 61
|
||||
user_asks: 0
|
||||
lazy_comments: 1
|
||||
syntax_errors: 3
|
||||
indentation_errors: 15
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 0
|
||||
command: aider --gpt-4-turbo
|
||||
date: 2024-04-09
|
||||
versions: 0.27.1-dev
|
||||
seconds_per_case: 42.4
|
||||
total_cost: 19.6556
|
||||
|
||||
- dirname: 2024-05-08-22-25-41--may-refac-gpt-4-0125-preview-ex-sys
|
||||
test_cases: 89
|
||||
model: gpt-4-0125-preview
|
||||
edit_format: udiff
|
||||
commit_hash: bf09bd3-dirty
|
||||
pass_rate_1: 33.7
|
||||
percent_cases_well_formed: 47.2
|
||||
error_outputs: 142
|
||||
num_malformed_responses: 47
|
||||
user_asks: 0
|
||||
lazy_comments: 1
|
||||
syntax_errors: 2
|
||||
indentation_errors: 16
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 0
|
||||
command: aider --model gpt-4-0125-preview
|
||||
date: 2024-05-08
|
||||
versions: 0.33.1-dev
|
||||
seconds_per_case: 56.6
|
||||
total_cost: 20.3270
|
||||
|
||||
- dirname: 2024-05-08-21-24-16--may-refac-gpt-4-1106-preview
|
||||
test_cases: 89
|
||||
model: gpt-4-1106-preview
|
||||
edit_format: udiff
|
||||
commit_hash: eaa2514-dirty
|
||||
pass_rate_1: 50.6
|
||||
percent_cases_well_formed: 39.3
|
||||
error_outputs: 164
|
||||
num_malformed_responses: 54
|
||||
user_asks: 1
|
||||
lazy_comments: 17
|
||||
syntax_errors: 0
|
||||
indentation_errors: 8
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 0
|
||||
command: aider --model gpt-4-1106-preview
|
||||
date: 2024-05-08
|
||||
versions: 0.33.1-dev
|
||||
seconds_per_case: 61.8
|
||||
total_cost: 18.3844
|
||||
|
||||
- dirname: 2024-05-13-17-42-22--refac-gpt-4o-diff
|
||||
test_cases: 89
|
||||
model: gpt-4o
|
||||
edit_format: diff
|
||||
commit_hash: b6cd852
|
||||
pass_rate_1: 62.9
|
||||
percent_cases_well_formed: 53.9
|
||||
error_outputs: 9025
|
||||
num_malformed_responses: 41
|
||||
user_asks: 0
|
||||
lazy_comments: 2
|
||||
syntax_errors: 0
|
||||
indentation_errors: 5
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 0
|
||||
command: aider
|
||||
date: 2024-05-13
|
||||
versions: 0.34.1-dev
|
||||
seconds_per_case: 27.8
|
||||
total_cost: 0.0000
|
||||
|
||||
- dirname: 2024-04-10-13-26-18--refac-gpt-4-turbo-2024-04-09-diff
|
||||
test_cases: 88
|
||||
model: gpt-4-turbo-2024-04-09 (diff)
|
||||
edit_format: diff
|
||||
commit_hash: 7875418
|
||||
pass_rate_1: 21.4
|
||||
percent_cases_well_formed: 6.8
|
||||
error_outputs: 247
|
||||
num_malformed_responses: 82
|
||||
user_asks: 1
|
||||
lazy_comments: 2
|
||||
syntax_errors: 3
|
||||
indentation_errors: 8
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 0
|
||||
command: aider --model gpt-4-turbo-2024-04-09
|
||||
date: 2024-04-10
|
||||
versions: 0.28.1-dev
|
||||
seconds_per_case: 67.8
|
||||
total_cost: 20.4889
|
||||
|
||||
- dirname: 2024-07-01-18-30-33--refac-claude-3.5-sonnet-diff-not-lazy
|
||||
test_cases: 89
|
||||
model: claude-3.5-sonnet (diff)
|
||||
edit_format: diff
|
||||
commit_hash: 7396e38-dirty
|
||||
pass_rate_1: 64.0
|
||||
percent_cases_well_formed: 76.4
|
||||
error_outputs: 176
|
||||
num_malformed_responses: 39
|
||||
num_with_malformed_responses: 21
|
||||
user_asks: 11
|
||||
lazy_comments: 2
|
||||
syntax_errors: 4
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 0
|
||||
command: aider --sonnet
|
||||
date: 2024-07-01
|
||||
versions: 0.40.7-dev
|
||||
seconds_per_case: 42.8
|
||||
total_cost: 11.5242
|
||||
|
||||
|
||||
- dirname: 2024-07-04-15-06-43--refac-deepseek-coder2-128k
|
||||
test_cases: 89
|
||||
model: DeepSeek Coder V2 (128k context)
|
||||
edit_format: diff
|
||||
commit_hash: 08868fd
|
||||
pass_rate_1: 38.2
|
||||
percent_cases_well_formed: 73.0
|
||||
error_outputs: 393
|
||||
num_malformed_responses: 89
|
||||
num_with_malformed_responses: 24
|
||||
user_asks: 4
|
||||
lazy_comments: 2
|
||||
syntax_errors: 1
|
||||
indentation_errors: 5
|
||||
exhausted_context_windows: 3
|
||||
test_timeouts: 0
|
||||
command: aider --model deepseek/deepseek-coder
|
||||
date: 2024-07-04
|
||||
versions: 0.42.1-dev
|
||||
seconds_per_case: 82.9
|
||||
total_cost: 0.2601
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue