corrected 1022 benchmark results

This commit is contained in:
Paul Gauthier 2024-10-22 10:52:35 -07:00
parent 748fd0cf12
commit bd28d8f3fb

View file

@ -1589,25 +1589,25 @@
seconds_per_case: 64.9 seconds_per_case: 64.9
total_cost: 0.0000 total_cost: 0.0000
- dirname: 2024-10-22-16-18-20--sonnet-1022-diff - dirname: 2024-10-22-17-45-28--sonnet-1022-diff-fixed-model-settings
test_cases: 133 test_cases: 133
model: claude-3-5-sonnet-20241022 model: anthropic/claude-3-5-sonnet-20241022
edit_format: diff edit_format: diff
commit_hash: 46269f2-dirty commit_hash: 3b14eb9
pass_rate_1: 67.7 pass_rate_1: 69.2
pass_rate_2: 83.5 pass_rate_2: 84.2
percent_cases_well_formed: 98.5 percent_cases_well_formed: 99.2
error_outputs: 5 error_outputs: 1
num_malformed_responses: 5 num_malformed_responses: 1
num_with_malformed_responses: 2 num_with_malformed_responses: 1
user_asks: 2 user_asks: 0
lazy_comments: 1 lazy_comments: 1
syntax_errors: 0 syntax_errors: 0
indentation_errors: 0 indentation_errors: 0
exhausted_context_windows: 0 exhausted_context_windows: 0
test_timeouts: 1 test_timeouts: 0
command: aider --model anthropic/claude-3-5-sonnet-20241022 command: aider --model anthropic/claude-3-5-sonnet-20241022
date: 2024-10-22 date: 2024-10-22
versions: 0.59.2.dev versions: 0.59.2.dev
seconds_per_case: 17.7 seconds_per_case: 18.6
total_cost: 0.0000 total_cost: 0.0000