This commit is contained in:
Paul Gauthier 2024-09-12 20:40:12 -07:00
parent d747a3781d
commit eba845ea51
3 changed files with 152 additions and 20 deletions

View file

@ -1132,4 +1132,49 @@
versions: 0.56.1.dev
seconds_per_case: 177.7
total_cost: 11.1071
- dirname: 2024-09-12-22-44-14--o1-preview-diff
test_cases: 133
model: o1-preview (diff)
edit_format: diff
commit_hash: 72f52bd
pass_rate_1: 56.4
pass_rate_2: 75.2
percent_cases_well_formed: 84.2
error_outputs: 27
num_malformed_responses: 27
num_with_malformed_responses: 21
user_asks: 8
lazy_comments: 0
syntax_errors: 7
indentation_errors: 3
exhausted_context_windows: 0
test_timeouts: 3
command: aider --model o1-preview
date: 2024-09-12
versions: 0.56.1.dev
seconds_per_case: 95.8
total_cost: 71.7927
- dirname: 2024-09-13-02-13-59--o1-preview-whole
test_cases: 133
model: o1-preview (whole)
edit_format: whole
commit_hash: 72f52bd-dirty
pass_rate_1: 58.6
pass_rate_2: 79.7
percent_cases_well_formed: 100.0
error_outputs: 0
num_malformed_responses: 0
num_with_malformed_responses: 0
user_asks: 2
lazy_comments: 0
syntax_errors: 1
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 2
command: aider --model o1-preview
date: 2024-09-13
versions: 0.56.1.dev
seconds_per_case: 47.4
total_cost: 38.0612

View file

@ -115,4 +115,72 @@
versions: 0.56.1.dev
seconds_per_case: 177.7
total_cost: 11.1071
- dirname: 2024-09-05-21-26-49--sonnet-whole-sep5
test_cases: 133
model: claude-3.5-sonnet (whole)
edit_format: whole
commit_hash: 8cfdcbd
pass_rate_1: 55.6
pass_rate_2: 75.2
percent_cases_well_formed: 100.0
error_outputs: 0
num_malformed_responses: 0
num_with_malformed_responses: 0
user_asks: 0
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 0
command: aider --model openrouter/anthropic/claude-3.5-sonnet --edit-format whole
date: 2024-09-05
versions: 0.55.1.dev
seconds_per_case: 15.2
total_cost: 2.3502
- dirname: 2024-09-12-22-44-14--o1-preview-diff
test_cases: 133
model: o1-preview (diff)
edit_format: diff
commit_hash: 72f52bd
pass_rate_1: 56.4
pass_rate_2: 75.2
percent_cases_well_formed: 84.2
error_outputs: 27
num_malformed_responses: 27
num_with_malformed_responses: 21
user_asks: 8
lazy_comments: 0
syntax_errors: 7
indentation_errors: 3
exhausted_context_windows: 0
test_timeouts: 3
command: aider --model o1-preview
date: 2024-09-12
versions: 0.56.1.dev
seconds_per_case: 95.8
total_cost: 71.7927
- dirname: 2024-09-13-02-13-59--o1-preview-whole
test_cases: 133
model: o1-preview (whole)
edit_format: whole
commit_hash: 72f52bd-dirty
pass_rate_1: 58.6
pass_rate_2: 79.7
percent_cases_well_formed: 100.0
error_outputs: 0
num_malformed_responses: 0
num_with_malformed_responses: 0
user_asks: 2
lazy_comments: 0
syntax_errors: 1
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 2
command: aider --model o1-preview
date: 2024-09-13
versions: 0.56.1.dev
seconds_per_case: 47.4
total_cost: 38.0612