mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-03 03:05:00 +00:00
927 lines
No EOL
24 KiB
YAML
927 lines
No EOL
24 KiB
YAML
- dirname: 2024-08-15-13-17-11--json-no-lint-gpt-4o-2024-08-06-whole
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 60.2
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-08-06
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 4.3
|
|
total_cost: 0.7965
|
|
- dirname: 2024-08-15-13-18-36--json-no-lint-gpt-4o-2024-08-06-func
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: JSON
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 57.9
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 1
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-08-06
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 5.7
|
|
total_cost: 0.8417
|
|
- dirname: 2024-08-15-13-21-55--json-no-lint-gpt-4o-2024-05-13-func
|
|
test_cases: 133
|
|
model: gpt-4o-2024-05-13
|
|
edit_format: JSON
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 60.2
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 2
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 1
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model gpt-4o-2024-05-13
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 7.1
|
|
total_cost: 1.2285
|
|
- dirname: 2024-08-15-13-23-33--json-no-lint-claude-3.5-sonnet-whole
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 60.2
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model claude-3.5-sonnet
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 10.5
|
|
total_cost: 1.6714
|
|
- dirname: 2024-08-15-13-26-38--json-no-lint-deepseek-coder-whole
|
|
test_cases: 133
|
|
model: deepseek-coder V2 0724
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 59.4
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 2
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 2
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model deepseek-coder
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 27.9
|
|
total_cost: 0.0438
|
|
- dirname: 2024-08-15-13-50-03--json-no-lint-gpt-4o-2024-08-06-whole-2
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 61.7
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-08-06
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 4.2
|
|
total_cost: 0.7946
|
|
- dirname: 2024-08-15-13-51-36--json-no-lint-gpt-4o-2024-08-06-func-2
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: JSON
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 56.4
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 1
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-08-06
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 6.4
|
|
total_cost: 0.8390
|
|
- dirname: 2024-08-15-13-54-53--json-no-lint-gpt-4o-2024-05-13-func-2
|
|
test_cases: 133
|
|
model: gpt-4o-2024-05-13
|
|
edit_format: JSON
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 60.2
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 1
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model gpt-4o-2024-05-13
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 7.7
|
|
total_cost: 1.2210
|
|
- dirname: 2024-08-15-13-56-21--json-no-lint-claude-3.5-sonnet-whole-2
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 60.9
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 1
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model claude-3.5-sonnet
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 16.5
|
|
total_cost: 1.6556
|
|
- dirname: 2024-08-15-14-06-12--json-no-lint-deepseek-coder-whole-2
|
|
test_cases: 133
|
|
model: deepseek-coder V2 0724
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 60.9
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 2
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 1
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model deepseek-coder
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 25.8
|
|
total_cost: 0.0439
|
|
- dirname: 2024-08-15-14-11-45--json-no-lint-gpt-4o-2024-08-06-whole-3
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 60.9
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-08-06
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 4.3
|
|
total_cost: 0.7945
|
|
- dirname: 2024-08-15-14-13-11--json-no-lint-gpt-4o-2024-08-06-func-3
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: JSON
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 56.4
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 1
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-08-06
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 5.6
|
|
total_cost: 0.8220
|
|
- dirname: 2024-08-15-14-16-34--json-no-lint-gpt-4o-2024-05-13-func-3
|
|
test_cases: 133
|
|
model: gpt-4o-2024-05-13
|
|
edit_format: JSON
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 58.6
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model gpt-4o-2024-05-13
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 8.7
|
|
total_cost: 1.2064
|
|
- dirname: 2024-08-15-14-17-51--json-no-lint-claude-3.5-sonnet-whole-3
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 60.2
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model claude-3.5-sonnet
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 11.0
|
|
total_cost: 1.6555
|
|
- dirname: 2024-08-15-14-21-06--json-no-lint-deepseek-coder-whole-3
|
|
test_cases: 133
|
|
model: deepseek-coder V2 0724
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 61.7
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 3
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 2
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 3
|
|
command: aider --model deepseek-coder
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 24.4
|
|
total_cost: 0.0439
|
|
- dirname: 2024-08-15-14-27-17--json-no-lint-gpt-4o-2024-08-06-whole-4
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 60.2
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-08-06
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 4.3
|
|
total_cost: 0.8015
|
|
- dirname: 2024-08-15-14-28-58--json-no-lint-gpt-4o-2024-08-06-func-4
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: JSON
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 60.2
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-08-06
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 6.0
|
|
total_cost: 0.8394
|
|
- dirname: 2024-08-15-14-32-58--json-no-lint-gpt-4o-2024-05-13-func-4
|
|
test_cases: 133
|
|
model: gpt-4o-2024-05-13
|
|
edit_format: JSON
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 59.4
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 2
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model gpt-4o-2024-05-13
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 11.1
|
|
total_cost: 1.2120
|
|
- dirname: 2024-08-15-14-34-39--json-no-lint-claude-3.5-sonnet-whole-4
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 60.9
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model claude-3.5-sonnet
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 11.3
|
|
total_cost: 1.6635
|
|
- dirname: 2024-08-15-14-38-35--json-no-lint-deepseek-coder-whole-4
|
|
test_cases: 133
|
|
model: deepseek-coder V2 0724
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 59.4
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 2
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 2
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model deepseek-coder
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 24.5
|
|
total_cost: 0.0438
|
|
- dirname: 2024-08-15-14-44-11--json-no-lint-gpt-4o-2024-08-06-whole-5
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 60.9
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-08-06
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 4.6
|
|
total_cost: 0.8023
|
|
- dirname: 2024-08-15-14-45-40--json-no-lint-gpt-4o-2024-08-06-func-5
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: JSON
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 57.1
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 3
|
|
command: aider --model gpt-4o-2024-08-06
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 6.3
|
|
total_cost: 0.8354
|
|
- dirname: 2024-08-15-14-49-44--json-no-lint-gpt-4o-2024-05-13-func-5
|
|
test_cases: 133
|
|
model: gpt-4o-2024-05-13
|
|
edit_format: JSON
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 59.4
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 4
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model gpt-4o-2024-05-13
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 10.5
|
|
total_cost: 1.2099
|
|
- dirname: 2024-08-15-14-51-18--json-no-lint-claude-3.5-sonnet-whole-5
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 60.2
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model claude-3.5-sonnet
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 11.4
|
|
total_cost: 1.6685
|
|
- dirname: 2024-08-15-14-54-41--json-no-lint-deepseek-coder-whole-5
|
|
test_cases: 133
|
|
model: deepseek-coder V2 0724
|
|
edit_format: Markdown
|
|
commit_hash: bac04a2
|
|
pass_rate_1: 61.7
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 2
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 2
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model deepseek-coder
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 24.5
|
|
total_cost: 0.0439
|
|
- dirname: 2024-08-15-15-12-55--json-no-lint-strict-gpt-4o-2024-08-06-func-2
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: JSON (strict)
|
|
commit_hash: bf2d5fe
|
|
pass_rate_1: 57.1
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-08-06
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 5.9
|
|
total_cost: 0.8216
|
|
- dirname: 2024-08-15-15-14-31--json-no-lint-strict-gpt-4o-2024-08-06-func-3
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: JSON (strict)
|
|
commit_hash: bf2d5fe
|
|
pass_rate_1: 54.1
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 2
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-08-06
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 6.3
|
|
total_cost: 0.8410
|
|
- dirname: 2024-08-15-15-16-14--json-no-lint-strict-gpt-4o-2024-08-06-func-4
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: JSON (strict)
|
|
commit_hash: bf2d5fe
|
|
pass_rate_1: 59.4
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-08-06
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 5.9
|
|
total_cost: 0.8203
|
|
- dirname: 2024-08-15-15-17-50--json-no-lint-strict-gpt-4o-2024-08-06-func-5
|
|
test_cases: 133
|
|
model: gpt-4o-2024-08-06
|
|
edit_format: JSON (strict)
|
|
commit_hash: bf2d5fe
|
|
pass_rate_1: 57.1
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 1
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-08-06
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 6.1
|
|
total_cost: 0.8415
|
|
- dirname: 2024-08-15-17-36-22--json-no-lint-again-gpt-4o-2024-05-13-whole-1
|
|
test_cases: 133
|
|
model: gpt-4o-2024-05-13
|
|
edit_format: Markdown
|
|
commit_hash: ed94379
|
|
pass_rate_1: 60.2
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 7
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-05-13
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 6.8
|
|
total_cost: 1.5110
|
|
- dirname: 2024-08-15-17-38-13--json-no-lint-again-gpt-4o-2024-05-13-whole-2
|
|
test_cases: 133
|
|
model: gpt-4o-2024-05-13
|
|
edit_format: Markdown
|
|
commit_hash: ed94379
|
|
pass_rate_1: 60.9
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-05-13
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 7.0
|
|
total_cost: 1.4954
|
|
- dirname: 2024-08-15-17-40-10--json-no-lint-again-gpt-4o-2024-05-13-whole-3
|
|
test_cases: 133
|
|
model: gpt-4o-2024-05-13
|
|
edit_format: Markdown
|
|
commit_hash: ed94379
|
|
pass_rate_1: 60.9
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 0
|
|
command: aider --model gpt-4o-2024-05-13
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 6.8
|
|
total_cost: 1.4999
|
|
- dirname: 2024-08-15-17-41-30--json-no-lint-again-gpt-4o-2024-05-13-whole-4
|
|
test_cases: 133
|
|
model: gpt-4o-2024-05-13
|
|
edit_format: Markdown
|
|
commit_hash: ed94379
|
|
pass_rate_1: 58.6
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-05-13
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 7.4
|
|
total_cost: 1.4848
|
|
- dirname: 2024-08-15-17-43-12--json-no-lint-again-gpt-4o-2024-05-13-whole-5
|
|
test_cases: 133
|
|
model: gpt-4o-2024-05-13
|
|
edit_format: Markdown
|
|
commit_hash: ed94379
|
|
pass_rate_1: 59.4
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model gpt-4o-2024-05-13
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 7.6
|
|
total_cost: 1.4948
|
|
|
|
- dirname: 2024-08-15-19-35-32--json-no-lint-again-deepseek-coder-func-1
|
|
test_cases: 133
|
|
model: deepseek-coder V2 0724
|
|
edit_format: JSON
|
|
commit_hash: 3a2ac02-dirty
|
|
pass_rate_1: 50.4
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 2
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model deepseek-coder
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 17.8
|
|
total_cost: 0.0330
|
|
- dirname: 2024-08-15-19-37-50--json-no-lint-again-deepseek-coder-func-2
|
|
test_cases: 133
|
|
model: deepseek-coder V2 0724
|
|
edit_format: JSON
|
|
commit_hash: 1a98c28
|
|
pass_rate_1: 49.6
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 5
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model deepseek-coder
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 18.3
|
|
total_cost: 0.0336
|
|
- dirname: 2024-08-15-19-40-20--json-no-lint-again-deepseek-coder-func-3
|
|
test_cases: 133
|
|
model: deepseek-coder V2 0724
|
|
edit_format: JSON
|
|
commit_hash: 1a98c28
|
|
pass_rate_1: 48.9
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 1
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 5
|
|
indentation_errors: 1
|
|
exhausted_context_windows: 1
|
|
test_timeouts: 2
|
|
command: aider --model deepseek-coder
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 18.4
|
|
total_cost: 0.0337
|
|
- dirname: 2024-08-15-19-44-07--json-no-lint-again-deepseek-coder-func-4
|
|
test_cases: 133
|
|
model: deepseek-coder V2 0724
|
|
edit_format: JSON
|
|
commit_hash: 1a98c28
|
|
pass_rate_1: 53.4
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 2
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 2
|
|
command: aider --model deepseek-coder
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 17.6
|
|
total_cost: 0.0330
|
|
- dirname: 2024-08-15-19-46-48--json-no-lint-again-deepseek-coder-func-5
|
|
test_cases: 133
|
|
model: deepseek-coder V2 0724
|
|
edit_format: JSON
|
|
commit_hash: 1a98c28-dirty
|
|
pass_rate_1: 53.4
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 11
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 2
|
|
command: aider --model deepseek-coder
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 18.0
|
|
total_cost: 0.0332
|
|
|
|
- dirname: 2024-08-15-20-07-59--json-no-lint-again-claude-3.5-sonnet-func-1
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet
|
|
edit_format: JSON
|
|
commit_hash: 1a98c28
|
|
pass_rate_1: 54.1
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model claude-3.5-sonnet
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 9.5
|
|
total_cost: 1.5789
|
|
- dirname: 2024-08-15-20-09-39--json-no-lint-again-claude-3.5-sonnet-func-2
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet
|
|
edit_format: JSON
|
|
commit_hash: 1a98c28
|
|
pass_rate_1: 55.6
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model claude-3.5-sonnet
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 9.2
|
|
total_cost: 1.5916
|
|
- dirname: 2024-08-15-20-11-39--json-no-lint-again-claude-3.5-sonnet-func-3
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet
|
|
edit_format: JSON
|
|
commit_hash: 1a98c28
|
|
pass_rate_1: 53.4
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model claude-3.5-sonnet
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 10.3
|
|
total_cost: 1.5896
|
|
- dirname: 2024-08-15-20-13-44--json-no-lint-again-claude-3.5-sonnet-func-4
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet
|
|
edit_format: JSON
|
|
commit_hash: 1a98c28
|
|
pass_rate_1: 55.6
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model claude-3.5-sonnet
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 9.2
|
|
total_cost: 1.6000
|
|
- dirname: 2024-08-15-20-15-51--json-no-lint-again-claude-3.5-sonnet-func-5
|
|
test_cases: 133
|
|
model: claude-3.5-sonnet
|
|
edit_format: JSON
|
|
commit_hash: 1a98c28
|
|
pass_rate_1: 51.9
|
|
percent_cases_well_formed: 100.0
|
|
error_outputs: 0
|
|
num_malformed_responses: 0
|
|
num_with_malformed_responses: 0
|
|
user_asks: 0
|
|
lazy_comments: 0
|
|
syntax_errors: 0
|
|
indentation_errors: 0
|
|
exhausted_context_windows: 0
|
|
test_timeouts: 1
|
|
command: aider --model claude-3.5-sonnet
|
|
date: 2024-08-15
|
|
versions: 0.50.2-dev
|
|
seconds_per_case: 8.9
|
|
total_cost: 1.5936
|
|
|