mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-20 03:25:00 +00:00
492 lines
No EOL
12 KiB
YAML
492 lines
No EOL
12 KiB
YAML
- dirname: 2024-09-25-21-17-19--architect-sonnet-sonnet-diff
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet
|
|
editor_model: claude-3.5-sonnet
|
|
editor_edit_format: diff
|
|
edit_format: architect
|
|
commit_hash: c18d6a8-dirty
|
|
pass_rate_1: 62.4
|
|
pass_rate_2: 80.5
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 3
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 183
|
|
lazy_comments: 6
|
|
syntax_errors: 9
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 2
|
|
command: aider --model openrouter/anthropic/claude-3.5-sonnet
|
|
date: 2024-09-25
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 25.1
|
|
total_cost: 4.9502
|
|
|
|
- dirname: 2024-07-04-14-32-08--claude-3.5-sonnet-diff-continue
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet
|
|
edit_format: diff
|
|
commit_hash: 35f21b5
|
|
pass_rate_1: 57.1
|
|
pass_rate_2: 77.4
|
|
percent_cases_well_formed: 99.2
|
|
error_outputs: 23
|
|
released: 2024-06-20
|
|
num_malformed_responses: 4
|
|
num_with_malformed_responses: 1
|
|
user_asks: 2
|
|
lazy_comments: 0
|
|
syntax_errors: 1
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --sonnet
|
|
date: 2024-07-04
|
|
versions: 0.42.1-dev
|
|
seconds_per_case: 17.6
|
|
total_cost: 3.6346
|
|
|
|
- dirname: 2024-09-25-21-25-01--architect-o1mini-4o-jr-diff
|
|
test_cases: 133
|
|
model: o1-mini
|
|
editor_model: gpt-4o
|
|
editor_edit_format: diff
|
|
edit_format: architect
|
|
commit_hash: 3f682ed-dirty, 25e833b
|
|
pass_rate_1: 51.1
|
|
pass_rate_2: 70.7
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 12
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 214
|
|
lazy_comments: 6
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model o1-mini
|
|
date: 2024-09-25
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 23.7
|
|
total_cost: 9.3158
|
|
|
|
- dirname: 2024-09-26-15-05-58--architect-o1mini-deep-jr-whole
|
|
test_cases: 133
|
|
model: o1-mini
|
|
edit_format: architect
|
|
commit_hash: 1676653-dirty
|
|
editor_model: deepseek
|
|
editor_edit_format: whole
|
|
pass_rate_1: 51.9
|
|
pass_rate_2: 71.4
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 199
|
|
lazy_comments: 11
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 2
|
|
command: aider --model o1-mini
|
|
date: 2024-09-26
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 48.2
|
|
total_cost: 5.6069
|
|
|
|
- dirname: 2024-09-25-21-33-40--architect-4o-4o-jr-diff
|
|
test_cases: 133
|
|
model: gpt-4o
|
|
editor_model: gpt-4o
|
|
editor_edit_format: diff
|
|
edit_format: architect
|
|
commit_hash: 9f3cd92
|
|
pass_rate_1: 56.4
|
|
pass_rate_2: 75.2
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 13
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 207
|
|
lazy_comments: 8
|
|
syntax_errors: 1
|
|
indentation_errors: 1
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 3
|
|
command: aider --model gpt-4o
|
|
date: 2024-09-25
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 18.2
|
|
total_cost: 6.0918
|
|
|
|
- dirname: 2024-09-21-16-45-11--o1-preview-flex-sr-markers
|
|
test_cases: 133
|
|
model: o1-preview
|
|
edit_format: diff
|
|
commit_hash: 5493654-dirty
|
|
pass_rate_1: 57.9
|
|
pass_rate_2: 79.7
|
|
percent_cases_well_formed: 93.2
|
|
error_outputs: 11
|
|
num_malformed_responses: 11
|
|
num_with_malformed_responses: 9
|
|
user_asks: 3
|
|
lazy_comments: 0
|
|
syntax_errors: 10
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model o1-preview
|
|
date: 2024-09-21
|
|
versions: 0.56.1.dev
|
|
seconds_per_case: 80.9
|
|
total_cost: 63.9190
|
|
|
|
- dirname: 2024-09-25-21-39-05--architect-o1preview-4o-jr-diff
|
|
test_cases: 133
|
|
model: o1-preview
|
|
editor_model: gpt-4o
|
|
editor_edit_format: diff
|
|
edit_format: architect
|
|
commit_hash: 9f3cd92
|
|
pass_rate_1: 63.2
|
|
pass_rate_2: 80.5
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 23
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 191
|
|
lazy_comments: 2
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 4
|
|
command: aider --model o1-preview
|
|
date: 2024-09-25
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 42.3
|
|
total_cost: 39.3766
|
|
|
|
- dirname: 2024-09-25-21-52-42--architect-o1preview-sonnet-jr-diff
|
|
test_cases: 133
|
|
model: o1-preview
|
|
editor_model: claude-3.5-sonnet
|
|
editor_edit_format: diff
|
|
edit_format: architect
|
|
commit_hash: 9f3cd92
|
|
editor_model: claude-3-5-sonnet
|
|
pass_rate_1: 60.9
|
|
pass_rate_2: 82.7
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 1
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 180
|
|
lazy_comments: 3
|
|
syntax_errors: 9
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 3
|
|
command: aider --model o1-preview
|
|
date: 2024-09-25
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 44.9
|
|
total_cost: 37.6192
|
|
|
|
- dirname: 2024-09-21-16-40-56--o1-mini-flex-sr-markers
|
|
test_cases: 36
|
|
model: o1-mini
|
|
edit_format: diff
|
|
commit_hash: 5493654
|
|
pass_rate_1: 50.0
|
|
pass_rate_2: 61.1
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 3
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 1
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model o1-mini
|
|
date: 2024-09-21
|
|
versions: 0.56.1.dev
|
|
seconds_per_case: 26.7
|
|
total_cost: 2.4226
|
|
|
|
- dirname: 2024-09-25-23-12-14--architect-o1mini-deep-jr-diff
|
|
test_cases: 133
|
|
model: o1-mini
|
|
edit_format: architect
|
|
commit_hash: 9f3cd92-dirty
|
|
editor_model: deepseek
|
|
editor_edit_format: diff
|
|
pass_rate_1: 48.9
|
|
pass_rate_2: 69.2
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 1
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 202
|
|
lazy_comments: 12
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 2
|
|
command: aider --model o1-mini
|
|
date: 2024-09-25
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 52.2
|
|
total_cost: 5.7927
|
|
|
|
- dirname: 2024-09-25-23-18-16--architect-o1preview-deep-jr-diff
|
|
test_cases: 133
|
|
model: o1-preview
|
|
edit_format: architect
|
|
commit_hash: 9f3cd92-dirty
|
|
editor_model: deepseek
|
|
editor_edit_format: diff
|
|
pass_rate_1: 64.7
|
|
pass_rate_2: 80.5
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 5
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 180
|
|
lazy_comments: 2
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model o1-preview
|
|
date: 2024-09-25
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 73.2
|
|
total_cost: 35.7887
|
|
|
|
- dirname: 2024-09-25-23-30-36--architect-o1preview-deep-jr-whole
|
|
test_cases: 133
|
|
model: o1-preview
|
|
edit_format: architect
|
|
commit_hash: 9f3cd92-dirty
|
|
editor_model: deepseek
|
|
editor_edit_format: whole
|
|
pass_rate_1: 63.9
|
|
pass_rate_2: 85.0
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 181
|
|
lazy_comments: 12
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 2
|
|
command: aider --model o1-preview
|
|
date: 2024-09-25
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 67.4
|
|
total_cost: 35.3152
|
|
|
|
- dirname: 2024-09-26-15-15-17--architect-sonnet-deep-jr-whole
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet
|
|
edit_format: architect
|
|
commit_hash: bc1559f-dirty
|
|
editor_model: deepseek
|
|
editor_edit_format: whole
|
|
pass_rate_1: 61.7
|
|
pass_rate_2: 78.9
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 184
|
|
lazy_comments: 5
|
|
syntax_errors: 9
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 3
|
|
command: aider --model openrouter/anthropic/claude-3.5-sonnet
|
|
date: 2024-09-26
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 37.2
|
|
total_cost: 2.1510
|
|
|
|
- dirname: 2024-09-26-15-33-28--costs-gpt4o-diff
|
|
test_cases: 133
|
|
model: gpt-4o
|
|
edit_format: diff
|
|
commit_hash: 89aa385-dirty
|
|
pass_rate_1: 55.6
|
|
pass_rate_2: 71.4
|
|
percent_cases_well_formed: 97.7
|
|
error_outputs: 5
|
|
num_malformed_responses: 5
|
|
num_with_malformed_responses: 3
|
|
user_asks: 10
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 1
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model gpt-4o
|
|
date: 2024-09-26
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 9.7
|
|
total_cost: 3.8088
|
|
|
|
- dirname: 2024-09-26-15-41-08--architect-4o-deep-jr-whole
|
|
test_cases: 133
|
|
model: gpt-4o
|
|
edit_format: architect
|
|
commit_hash: 89aa385-dirty
|
|
editor_model: deepseek
|
|
editor_edit_format: whole
|
|
pass_rate_1: 60.9
|
|
pass_rate_2: 73.7
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 187
|
|
lazy_comments: 12
|
|
syntax_errors: 5
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o
|
|
date: 2024-09-26
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 38.0
|
|
total_cost: 2.4737
|
|
|
|
- dirname: 2024-09-26-15-54-08--architect-4o-deep-jr-diff
|
|
test_cases: 133
|
|
model: gpt-4o
|
|
edit_format: architect
|
|
commit_hash: 89aa385-dirty
|
|
editor_model: deepseek
|
|
editor_edit_format: diff
|
|
pass_rate_1: 57.1
|
|
pass_rate_2: 74.4
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 4
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 192
|
|
lazy_comments: 6
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 2
|
|
command: aider --model gpt-4o
|
|
date: 2024-09-26
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 44.0
|
|
total_cost: 2.5498
|
|
|
|
- dirname: 2024-09-26-16-06-39--architect-sonnet-deep-jr-diff
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet
|
|
edit_format: architect
|
|
commit_hash: 89aa385-dirty
|
|
editor_model: deepseek
|
|
editor_edit_format: diff
|
|
pass_rate_1: 61.7
|
|
pass_rate_2: 78.9
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 2
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 184
|
|
lazy_comments: 2
|
|
syntax_errors: 9
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 2
|
|
command: aider --model openrouter/anthropic/claude-3.5-sonnet
|
|
date: 2024-09-26
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 43.2
|
|
total_cost: 2.1488
|
|
|
|
- dirname: 2024-09-27-18-15-32--architect-4omini-4omini
|
|
test_cases: 133
|
|
model: gpt-4o-mini
|
|
edit_format: architect
|
|
commit_hash: 0bd8058-dirty
|
|
editor_model: gpt-4o-mini
|
|
editor_edit_format: whole
|
|
pass_rate_1: 43.6
|
|
pass_rate_2: 60.2
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 208
|
|
lazy_comments: 2
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 3
|
|
command: aider --model gpt-4o-mini
|
|
date: 2024-09-27
|
|
versions: 0.57.2.dev
|
|
seconds_per_case: 21.0
|
|
total_cost: 0.1527
|
|
|
|
- dirname: 2024-07-18-18-57-46--gpt-4o-mini-whole
|
|
test_cases: 133
|
|
model: gpt-4o-mini
|
|
edit_format: whole
|
|
commit_hash: d31eef3-dirty
|
|
pass_rate_1: 40.6
|
|
pass_rate_2: 55.6
|
|
released: 2024-07-18
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 1
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 1
|
|
lazy_comments: 0
|
|
syntax_errors: 1
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 2
|
|
command: aider --model gpt-4o-mini
|
|
date: 2024-07-18
|
|
versions: 0.44.1-dev
|
|
seconds_per_case: 7.8
|
|
total_cost: 0.0916
|
|
|
|
- dirname: 2024-09-29-22-35-36--architect-o1preview-o1mini-whole
|
|
test_cases: 133
|
|
model: o1-preview
|
|
edit_format: architect
|
|
commit_hash: 53ca83b
|
|
editor_model: o1-mini
|
|
editor_edit_format: whole
|
|
pass_rate_1: 65.4
|
|
pass_rate_2: 85.0
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 179
|
|
lazy_comments: 4
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model o1-preview
|
|
date: 2024-09-29
|
|
versions: 0.58.1.dev
|
|
seconds_per_case: 39.7
|
|
total_cost: 36.2078 |