This commit is contained in:
Paul Gauthier 2024-08-26 20:53:38 -07:00
parent 8da47b9664
commit 4b82277ef7
9 changed files with 477 additions and 14 deletions

View file

@ -0,0 +1,459 @@
- dirname: 2024-06-20-15-16-41--claude-3.5-sonnet-diff
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: 068609e-dirty
pass_rate_1: 57.9
pass_rate_2: 74.4
percent_cases_well_formed: 97.0
error_outputs: 48
num_malformed_responses: 11
num_with_malformed_responses: 4
user_asks: 0
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 1
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-06-20
versions: 0.38.1-dev
seconds_per_case: 21.6
total_cost: 0.0000
- dirname: 2024-06-24-12-48-43--claude-3.5-sonnet-udiff
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: udiff
commit_hash: 7be08c7
pass_rate_1: 62.4
pass_rate_2: 74.4
percent_cases_well_formed: 100.0
error_outputs: 10
num_malformed_responses: 0
num_with_malformed_responses: 0
user_asks: 10
lazy_comments: 0
syntax_errors: 1
indentation_errors: 2
exhausted_context_windows: 0
test_timeouts: 1
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-06-24
versions: 0.39.1-dev
seconds_per_case: 14.3
total_cost: 0.0000
- dirname: 2024-06-24-17-44-31--claude-3.5-sonnet-diff-less-chatty
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: 0d484e5
pass_rate_1: 57.9
pass_rate_2: 74.4
percent_cases_well_formed: 99.2
error_outputs: 14
num_malformed_responses: 3
num_with_malformed_responses: 1
user_asks: 2
lazy_comments: 0
syntax_errors: 1
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 4
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-06-24
versions: 0.39.1-dev
seconds_per_case: 16.0
total_cost: 0.0000
- dirname: 2024-06-24-17-50-46--claude-3.5-sonnet-diff-less-chatty2
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: 3015495
pass_rate_1: 59.4
pass_rate_2: 76.7
percent_cases_well_formed: 99.2
error_outputs: 5
num_malformed_responses: 1
num_with_malformed_responses: 1
user_asks: 1
lazy_comments: 0
syntax_errors: 0
indentation_errors: 1
exhausted_context_windows: 0
test_timeouts: 2
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-06-24
versions: 0.39.1-dev
seconds_per_case: 15.7
total_cost: 0.0000
- dirname: 2024-06-24-17-56-40--claude-3.5-sonnet-diff-less-chatty-sys-examples
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: 3015495-dirty
pass_rate_1: 58.6
pass_rate_2: 75.9
percent_cases_well_formed: 100.0
error_outputs: 2
num_malformed_responses: 0
num_with_malformed_responses: 0
user_asks: 0
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 3
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-06-24
versions: 0.39.1-dev
seconds_per_case: 15.9
total_cost: 0.0000
- dirname: 2024-07-04-14-32-08--claude-3.5-sonnet-diff-continue
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: 35f21b5
pass_rate_1: 57.1
pass_rate_2: 77.4
percent_cases_well_formed: 99.2
error_outputs: 23
num_malformed_responses: 4
num_with_malformed_responses: 1
user_asks: 2
lazy_comments: 0
syntax_errors: 1
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 1
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-07-04
versions: 0.42.1-dev
seconds_per_case: 17.6
total_cost: 3.6346
- dirname: 2024-07-06-19-39-59--claude-3.5-sonnet-diff-platform
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: e47c2a9-dirty
pass_rate_1: 57.9
pass_rate_2: 78.2
percent_cases_well_formed: 100.0
error_outputs: 0
num_malformed_responses: 0
num_with_malformed_responses: 0
user_asks: 0
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 1
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-07-06
versions: 0.42.1-dev
seconds_per_case: 14.6
total_cost: 3.5616
- dirname: 2024-07-24-17-11-07--claude-3.5-sonnet-diff-july24
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: 859a13e
pass_rate_1: 59.4
pass_rate_2: 78.2
percent_cases_well_formed: 99.2
error_outputs: 6
num_malformed_responses: 1
num_with_malformed_responses: 1
user_asks: 1
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 1
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-07-24
versions: 0.45.2-dev
seconds_per_case: 16.9
total_cost: 3.4981
- dirname: 2024-07-28-20-23-42--claude-3.5-sonnet-diff-no-reminder
test_cases: 94
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: e799e89-dirty
pass_rate_1: 59.6
pass_rate_2: 83.0
percent_cases_well_formed: 98.9
error_outputs: 12
num_malformed_responses: 2
num_with_malformed_responses: 1
user_asks: 2
lazy_comments: 0
syntax_errors: 1
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 1
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-07-28
versions: 0.45.2-dev
seconds_per_case: 15.7
total_cost: 2.4340
- dirname: 2024-08-14-00-46-09--claude-3.5-sonnet-diff-no-ipynb-again
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: 139f799
pass_rate_1: 57.9
pass_rate_2: 75.9
percent_cases_well_formed: 98.5
error_outputs: 22
num_malformed_responses: 5
num_with_malformed_responses: 2
user_asks: 249
lazy_comments: 0
syntax_errors: 1
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 1
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-08-14
versions: 0.50.1-dev
seconds_per_case: 18.0
total_cost: 3.7058
- dirname: 2024-06-21-00-07-01--claude-3.5-sonnet-do-over
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: fb26174-dirty
pass_rate_1: 59.4
pass_rate_2: 80.5
percent_cases_well_formed: 99.2
error_outputs: 20
num_malformed_responses: 4
num_with_malformed_responses: 1
user_asks: 1
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 1
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-06-21
versions: 0.39.1-dev
seconds_per_case: 18.3
total_cost: 0.0000
- dirname: 2024-06-21-00-18-25--claude-3.5-sonnet-do-over2
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: fb26174-dirty
pass_rate_1: 58.6
pass_rate_2: 77.4
percent_cases_well_formed: 98.5
error_outputs: 22
num_malformed_responses: 4
num_with_malformed_responses: 2
user_asks: 0
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 0
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-06-21
versions: 0.39.1-dev
seconds_per_case: 17.3
total_cost: 0.0000
- dirname: 2024-06-24-00-09-40--claude-3.5-sonnet-chatty
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: b44c246-dirty
pass_rate_1: 59.4
pass_rate_2: 75.2
percent_cases_well_formed: 98.5
error_outputs: 21
num_malformed_responses: 5
num_with_malformed_responses: 2
user_asks: 0
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 2
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-06-24
versions: 0.39.1-dev
seconds_per_case: 15.7
total_cost: 0.0000
- dirname: 2024-06-24-00-33-35--claude-3.5-sonnet-chatty-do-over
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: bc1dfa3
pass_rate_1: 58.6
pass_rate_2: 76.7
percent_cases_well_formed: 97.7
error_outputs: 26
num_malformed_responses: 6
num_with_malformed_responses: 3
user_asks: 0
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 2
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-06-24
versions: 0.39.1-dev
seconds_per_case: 16.4
total_cost: 0.0000
- dirname: 2024-08-18-19-57-30--claude-3.5-sonnet-aug18
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: 5099a5c
pass_rate_1: 54.9
pass_rate_2: 78.9
percent_cases_well_formed: 97.7
error_outputs: 47
num_malformed_responses: 11
num_with_malformed_responses: 3
user_asks: 0
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 2
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-08-18
versions: 0.50.2-dev
seconds_per_case: 22.3
total_cost: 3.9008
- dirname: 2024-08-18-20-23-50--claude-3.5-sonnet-aug18-cache-prompts
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: 53db8cf-dirty
pass_rate_1: 56.4
pass_rate_2: 78.9
percent_cases_well_formed: 97.7
error_outputs: 16
num_malformed_responses: 4
num_with_malformed_responses: 3
user_asks: 0
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 3
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-08-18
versions: 0.50.2-dev
seconds_per_case: 21.1
total_cost: 3.6918
- dirname: 2024-08-18-23-11-04--claude-3.5-sonnet-aug18-cache-prompts-cold
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: 53db8cf-dirty
pass_rate_1: 56.4
pass_rate_2: 78.2
percent_cases_well_formed: 97.0
error_outputs: 30
num_malformed_responses: 7
num_with_malformed_responses: 4
user_asks: 1
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 2
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-08-18
versions: 0.50.2-dev
seconds_per_case: 21.8
total_cost: 3.7858
- dirname: 2024-08-21-01-07-39--sonnet-diff-cache
test_cases: 133
model: claude-3-5-sonnet-20240620
edit_format: diff
commit_hash: e12157b-dirty
pass_rate_1: 57.1
pass_rate_2: 82.0
percent_cases_well_formed: 98.5
error_outputs: 12
num_malformed_responses: 2
num_with_malformed_responses: 2
user_asks: 0
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 2
command: aider --model claude-3-5-sonnet-20240620
date: 2024-08-21
versions: 0.51.2-dev
seconds_per_case: 14.5
total_cost: 3.1795
- dirname: 2024-08-21-00-50-49--shell-cmds-sonnet-user-remind
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: 919ea05
pass_rate_1: 63.2
pass_rate_2: 79.7
percent_cases_well_formed: 98.5
error_outputs: 18
num_malformed_responses: 4
num_with_malformed_responses: 2
user_asks: 26
lazy_comments: 0
syntax_errors: 0
indentation_errors: 2
exhausted_context_windows: 0
test_timeouts: 1
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-08-21
versions: 0.51.2-dev
seconds_per_case: 16.3
total_cost: 3.4738
- dirname: 2024-08-21-00-55-30--shell-cmds-sonnet-no-user-remind
test_cases: 133
model: openrouter/anthropic/claude-3.5-sonnet
edit_format: diff
commit_hash: 5c7707a
pass_rate_1: 63.9
pass_rate_2: 80.5
percent_cases_well_formed: 97.7
error_outputs: 51
num_malformed_responses: 12
num_with_malformed_responses: 3
user_asks: 24
lazy_comments: 0
syntax_errors: 0
indentation_errors: 1
exhausted_context_windows: 0
test_timeouts: 1
command: aider --model openrouter/anthropic/claude-3.5-sonnet
date: 2024-08-21
versions: 0.51.2-dev
seconds_per_case: 17.7
total_cost: 3.8990