mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-16 01:25:00 +00:00
298 lines
No EOL
7.3 KiB
YAML
298 lines
No EOL
7.3 KiB
YAML
- dirname: 2024-05-04-23-27-02--refac-gemini
|
|
test_cases: 89
|
|
model: gemini/gemini-1.5-pro-latest
|
|
edit_format: diff-fenced
|
|
commit_hash: a0649ba-dirty, 425cb29, 1b35ca2-dirty, 3e4fca2-dirty
|
|
pass_rate_1: 49.4
|
|
percent_cases_well_formed: 7.9
|
|
error_outputs: 247
|
|
num_malformed_responses: 82
|
|
user_asks: 0
|
|
lazy_comments: 4
|
|
syntax_errors: 0
|
|
indentation_errors: 8
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model gemini/gemini-1.5-pro-latest
|
|
date: 2024-05-04
|
|
versions: 0.31.2-dev
|
|
seconds_per_case: 55.7
|
|
total_cost: 0.0000
|
|
- dirname: 2024-05-04-17-45-53--refac-opus
|
|
test_cases: 83
|
|
model: claude-3-opus-20240229
|
|
edit_format: diff
|
|
commit_hash: b02320b-dirty
|
|
pass_rate_1: 72.3
|
|
percent_cases_well_formed: 79.5
|
|
error_outputs: 51
|
|
num_malformed_responses: 17
|
|
user_asks: 0
|
|
lazy_comments: 2
|
|
syntax_errors: 1
|
|
indentation_errors: 3
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --opus
|
|
date: 2024-05-04
|
|
versions: 0.31.2-dev
|
|
seconds_per_case: 67.8
|
|
total_cost: 27.9176
|
|
- dirname: 2024-04-09-21-49-54--refac-gpt-4-turbo-2024-04-09
|
|
test_cases: 88
|
|
model: gpt-4-turbo-2024-04-09 (udiff)
|
|
edit_format: udiff
|
|
commit_hash: b75fdb9
|
|
pass_rate_1: 34.1
|
|
percent_cases_well_formed: 30.7
|
|
error_outputs: 183
|
|
num_malformed_responses: 61
|
|
user_asks: 0
|
|
lazy_comments: 1
|
|
syntax_errors: 3
|
|
indentation_errors: 15
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --gpt-4-turbo
|
|
date: 2024-04-09
|
|
versions: 0.27.1-dev
|
|
seconds_per_case: 42.4
|
|
total_cost: 19.6556
|
|
|
|
- dirname: 2024-05-08-22-25-41--may-refac-gpt-4-0125-preview-ex-sys
|
|
test_cases: 89
|
|
model: gpt-4-0125-preview
|
|
edit_format: udiff
|
|
commit_hash: bf09bd3-dirty
|
|
pass_rate_1: 33.7
|
|
percent_cases_well_formed: 47.2
|
|
error_outputs: 142
|
|
num_malformed_responses: 47
|
|
user_asks: 0
|
|
lazy_comments: 1
|
|
syntax_errors: 2
|
|
indentation_errors: 16
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model gpt-4-0125-preview
|
|
date: 2024-05-08
|
|
versions: 0.33.1-dev
|
|
seconds_per_case: 56.6
|
|
total_cost: 20.3270
|
|
|
|
- dirname: 2024-05-08-21-24-16--may-refac-gpt-4-1106-preview
|
|
test_cases: 89
|
|
model: gpt-4-1106-preview
|
|
edit_format: udiff
|
|
commit_hash: eaa2514-dirty
|
|
pass_rate_1: 50.6
|
|
percent_cases_well_formed: 39.3
|
|
error_outputs: 164
|
|
num_malformed_responses: 54
|
|
user_asks: 1
|
|
lazy_comments: 17
|
|
syntax_errors: 0
|
|
indentation_errors: 8
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model gpt-4-1106-preview
|
|
date: 2024-05-08
|
|
versions: 0.33.1-dev
|
|
seconds_per_case: 61.8
|
|
total_cost: 18.3844
|
|
|
|
- dirname: 2024-05-13-17-42-22--refac-gpt-4o-diff
|
|
test_cases: 89
|
|
model: gpt-4o
|
|
edit_format: diff
|
|
commit_hash: b6cd852
|
|
pass_rate_1: 62.9
|
|
percent_cases_well_formed: 53.9
|
|
error_outputs: 9025
|
|
num_malformed_responses: 41
|
|
user_asks: 0
|
|
lazy_comments: 2
|
|
syntax_errors: 0
|
|
indentation_errors: 5
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider
|
|
date: 2024-05-13
|
|
versions: 0.34.1-dev
|
|
seconds_per_case: 27.8
|
|
total_cost: 0.0000
|
|
|
|
- dirname: 2024-04-10-13-26-18--refac-gpt-4-turbo-2024-04-09-diff
|
|
test_cases: 88
|
|
model: gpt-4-turbo-2024-04-09 (diff)
|
|
edit_format: diff
|
|
commit_hash: 7875418
|
|
pass_rate_1: 21.4
|
|
percent_cases_well_formed: 6.8
|
|
error_outputs: 247
|
|
num_malformed_responses: 82
|
|
user_asks: 1
|
|
lazy_comments: 2
|
|
syntax_errors: 3
|
|
indentation_errors: 8
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model gpt-4-turbo-2024-04-09
|
|
date: 2024-04-10
|
|
versions: 0.28.1-dev
|
|
seconds_per_case: 67.8
|
|
total_cost: 20.4889
|
|
|
|
- dirname: 2024-07-01-18-30-33--refac-claude-3.5-sonnet-diff-not-lazy
|
|
test_cases: 89
|
|
model: claude-3.5-sonnet-20240620
|
|
edit_format: diff
|
|
commit_hash: 7396e38-dirty
|
|
pass_rate_1: 64.0
|
|
percent_cases_well_formed: 76.4
|
|
error_outputs: 176
|
|
num_malformed_responses: 39
|
|
num_with_malformed_responses: 21
|
|
user_asks: 11
|
|
lazy_comments: 2
|
|
syntax_errors: 4
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --sonnet
|
|
date: 2024-07-01
|
|
versions: 0.40.7-dev
|
|
seconds_per_case: 42.8
|
|
total_cost: 11.5242
|
|
|
|
- dirname: 2024-07-24-07-49-39--refac-deepseek-coder-v2-0724
|
|
test_cases: 89
|
|
model: DeepSeek Coder V2 0724 (deprecated)
|
|
edit_format: diff
|
|
commit_hash: bb6e597
|
|
pass_rate_1: 32.6
|
|
percent_cases_well_formed: 59.6
|
|
error_outputs: 487
|
|
num_malformed_responses: 113
|
|
num_with_malformed_responses: 36
|
|
user_asks: 10
|
|
lazy_comments: 2
|
|
syntax_errors: 1
|
|
indentation_errors: 12
|
|
exhausted_context_windows: 3
|
|
test_timeouts: 0
|
|
command: aider --model deepseek/deepseek-coder
|
|
date: 2024-07-24
|
|
versions: 0.45.2-dev
|
|
seconds_per_case: 85.0
|
|
total_cost: 0.4148
|
|
|
|
- dirname: 2024-08-06-18-44-03--refac-gpt-4o-2024-08-06-diff
|
|
test_cases: 89
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: diff
|
|
commit_hash: f388061
|
|
pass_rate_1: 49.4
|
|
percent_cases_well_formed: 89.9
|
|
error_outputs: 97
|
|
num_malformed_responses: 19
|
|
num_with_malformed_responses: 9
|
|
user_asks: 16
|
|
lazy_comments: 2
|
|
syntax_errors: 0
|
|
indentation_errors: 13
|
|
exhausted_context_windows: 2
|
|
test_timeouts: 0
|
|
command: aider --model openai/gpt-4o-2024-08-06
|
|
date: 2024-08-06
|
|
versions: 0.48.1-dev
|
|
seconds_per_case: 16.9
|
|
total_cost: 4.0873
|
|
|
|
- dirname: 2024-09-05-15-19-05--refac-deepseek-v2.5-no-shell
|
|
test_cases: 89
|
|
model: DeepSeek Chat V2.5
|
|
edit_format: diff
|
|
commit_hash: 1279c86, 1279c86-dirty
|
|
pass_rate_1: 31.5
|
|
percent_cases_well_formed: 67.4
|
|
error_outputs: 90
|
|
num_malformed_responses: 88
|
|
num_with_malformed_responses: 29
|
|
user_asks: 8
|
|
lazy_comments: 7
|
|
syntax_errors: 0
|
|
indentation_errors: 6
|
|
exhausted_context_windows: 2
|
|
test_timeouts: 0
|
|
command: aider --deepseek
|
|
date: 2024-09-05
|
|
versions: 0.55.1.dev
|
|
seconds_per_case: 225.4
|
|
total_cost: 1.0338
|
|
|
|
- dirname: 2024-10-22-19-57-27--refac-openrouter-sonnet-1022
|
|
test_cases: 89
|
|
model: claude-3-5-sonnet-20241022
|
|
edit_format: diff
|
|
commit_hash: 4a3e6ef
|
|
pass_rate_1: 92.1
|
|
percent_cases_well_formed: 91.0
|
|
error_outputs: 13
|
|
num_malformed_responses: 12
|
|
num_with_malformed_responses: 8
|
|
user_asks: 14
|
|
lazy_comments: 2
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --sonnet
|
|
date: 2024-10-22
|
|
versions: 0.60.1.dev
|
|
seconds_per_case: 32.5
|
|
total_cost: 8.4644
|
|
|
|
- dirname: 2024-10-22-20-03-10--refac-o1mini
|
|
test_cases: 89
|
|
model: o1-mini
|
|
edit_format: diff
|
|
commit_hash: 4a3e6ef-dirty
|
|
pass_rate_1: 44.9
|
|
percent_cases_well_formed: 29.2
|
|
error_outputs: 151
|
|
num_malformed_responses: 150
|
|
num_with_malformed_responses: 63
|
|
user_asks: 28
|
|
lazy_comments: 2
|
|
syntax_errors: 5
|
|
indentation_errors: 4
|
|
exhausted_context_windows: 1
|
|
test_timeouts: 0
|
|
command: aider --model o1-mini
|
|
date: 2024-10-22
|
|
versions: 0.60.1.dev
|
|
seconds_per_case: 115.3
|
|
total_cost: 29.0492
|
|
|
|
- dirname: 2024-10-22-20-26-36--refac-o1preview
|
|
test_cases: 89
|
|
model: o1-preview
|
|
edit_format: diff
|
|
commit_hash: 4a3e6ef-dirty
|
|
pass_rate_1: 75.3
|
|
percent_cases_well_formed: 57.3
|
|
error_outputs: 75
|
|
num_malformed_responses: 74
|
|
num_with_malformed_responses: 38
|
|
user_asks: 19
|
|
lazy_comments: 2
|
|
syntax_errors: 2
|
|
indentation_errors: 3
|
|
exhausted_context_windows: 1
|
|
test_timeouts: 0
|
|
command: aider --model o1-preview
|
|
date: 2024-10-22
|
|
versions: 0.60.1.dev
|
|
seconds_per_case: 231.7
|
|
total_cost: 120.9850 |