mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-22 04:24:59 +00:00
186 lines
No EOL
4.5 KiB
YAML
186 lines
No EOL
4.5 KiB
YAML
- dirname: 2024-07-18-18-57-46--gpt-4o-mini-whole
|
|
test_cases: 133
|
|
model: gpt-4o-mini (whole)
|
|
edit_format: whole
|
|
commit_hash: d31eef3-dirty
|
|
pass_rate_1: 40.6
|
|
pass_rate_2: 55.6
|
|
released: 2024-07-18
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 1
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 1
|
|
lazy_comments: 0
|
|
syntax_errors: 1
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 2
|
|
command: aider --model gpt-4o-mini
|
|
date: 2024-07-18
|
|
versions: 0.44.1-dev
|
|
seconds_per_case: 7.8
|
|
total_cost: 0.0916
|
|
|
|
- dirname: 2024-07-04-14-32-08--claude-3.5-sonnet-diff-continue
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet (diff)
|
|
edit_format: diff
|
|
commit_hash: 35f21b5
|
|
pass_rate_1: 57.1
|
|
pass_rate_2: 77.4
|
|
percent_cases_well_formed: 99.2
|
|
error_outputs: 23
|
|
released: 2024-06-20
|
|
num_malformed_responses: 4
|
|
num_with_malformed_responses: 1
|
|
user_asks: 2
|
|
lazy_comments: 0
|
|
syntax_errors: 1
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --sonnet
|
|
date: 2024-07-04
|
|
versions: 0.42.1-dev
|
|
seconds_per_case: 17.6
|
|
total_cost: 3.6346
|
|
|
|
- dirname: 2024-08-06-18-28-39--gpt-4o-2024-08-06-diff-again
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06 (diff)
|
|
edit_format: diff
|
|
commit_hash: ed9ed89
|
|
pass_rate_1: 57.1
|
|
pass_rate_2: 71.4
|
|
percent_cases_well_formed: 98.5
|
|
error_outputs: 18
|
|
num_malformed_responses: 2
|
|
num_with_malformed_responses: 2
|
|
user_asks: 10
|
|
lazy_comments: 0
|
|
syntax_errors: 6
|
|
indentation_errors: 2
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 5
|
|
released: 2024-08-06
|
|
command: aider --model openai/gpt-4o-2024-08-06
|
|
date: 2024-08-06
|
|
versions: 0.48.1-dev
|
|
seconds_per_case: 6.5
|
|
total_cost: 0.0000
|
|
|
|
- dirname: 2024-09-12-19-57-35--o1-mini-whole
|
|
test_cases: 133
|
|
model: o1-mini (whole)
|
|
edit_format: whole
|
|
commit_hash: 36fa773-dirty, 291b456
|
|
pass_rate_1: 49.6
|
|
pass_rate_2: 70.7
|
|
percent_cases_well_formed: 90.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 17
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model o1-mini
|
|
date: 2024-09-12
|
|
versions: 0.56.1.dev
|
|
seconds_per_case: 103.0
|
|
total_cost: 5.3725
|
|
|
|
- dirname: 2024-09-12-20-56-22--o1-mini-diff
|
|
test_cases: 133
|
|
model: o1-mini (diff)
|
|
edit_format: diff
|
|
commit_hash: 4598a37-dirty, 291b456, 752e823-dirty
|
|
pass_rate_1: 45.1
|
|
pass_rate_2: 62.4
|
|
percent_cases_well_formed: 85.7
|
|
error_outputs: 26
|
|
num_malformed_responses: 26
|
|
num_with_malformed_responses: 19
|
|
user_asks: 2
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model o1-mini --edit-format diff
|
|
date: 2024-09-12
|
|
versions: 0.56.1.dev
|
|
seconds_per_case: 177.7
|
|
total_cost: 11.1071
|
|
|
|
- dirname: 2024-09-05-21-26-49--sonnet-whole-sep5
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet (whole)
|
|
edit_format: whole
|
|
commit_hash: 8cfdcbd
|
|
pass_rate_1: 55.6
|
|
pass_rate_2: 75.2
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model openrouter/anthropic/claude-3.5-sonnet --edit-format whole
|
|
date: 2024-09-05
|
|
versions: 0.55.1.dev
|
|
seconds_per_case: 15.2
|
|
total_cost: 2.3502
|
|
|
|
- dirname: 2024-09-12-22-44-14--o1-preview-diff
|
|
test_cases: 133
|
|
model: o1-preview (diff)
|
|
edit_format: diff
|
|
commit_hash: 72f52bd
|
|
pass_rate_1: 56.4
|
|
pass_rate_2: 75.2
|
|
percent_cases_well_formed: 84.2
|
|
error_outputs: 27
|
|
num_malformed_responses: 27
|
|
num_with_malformed_responses: 21
|
|
user_asks: 8
|
|
lazy_comments: 0
|
|
syntax_errors: 7
|
|
indentation_errors: 3
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 3
|
|
command: aider --model o1-preview
|
|
date: 2024-09-12
|
|
versions: 0.56.1.dev
|
|
seconds_per_case: 95.8
|
|
total_cost: 71.7927
|
|
|
|
- dirname: 2024-09-13-02-13-59--o1-preview-whole
|
|
test_cases: 133
|
|
model: o1-preview (whole)
|
|
edit_format: whole
|
|
commit_hash: 72f52bd-dirty
|
|
pass_rate_1: 58.6
|
|
pass_rate_2: 79.7
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 2
|
|
lazy_comments: 0
|
|
syntax_errors: 1
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 2
|
|
command: aider --model o1-preview
|
|
date: 2024-09-13
|
|
versions: 0.56.1.dev
|
|
seconds_per_case: 47.4
|
|
total_cost: 38.0612 |