chore: Update polyglot leaderboard test results and metadata

This commit is contained in:
Paul Gauthier 2025-01-17 13:37:02 -08:00 committed by Paul Gauthier (aider)
parent 5b6c186125
commit a777f336e1

View file

@ -24,32 +24,32 @@
seconds_per_case: 17.3
total_cost: 0.3236
- dirname: 2024-12-21-18-44-28--polyglot-sonnet
- dirname: 2025-01-17-19-44-33--sonnet-baseline-jan-17
test_cases: 225
model: claude-3-5-sonnet-20241022
edit_format: diff
commit_hash: a755079-dirty
pass_rate_1: 18.7
pass_rate_2: 45.3
pass_num_1: 42
pass_num_2: 102
percent_cases_well_formed: 100.0
error_outputs: 1
num_malformed_responses: 0
num_with_malformed_responses: 0
user_asks: 14
commit_hash: 6451d59
pass_rate_1: 22.2
pass_rate_2: 51.6
pass_num_1: 50
pass_num_2: 116
percent_cases_well_formed: 99.6
error_outputs: 2
num_malformed_responses: 1
num_with_malformed_responses: 1
user_asks: 11
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 1
test_timeouts: 12
test_timeouts: 8
total_tests: 225
command: aider --model claude-3-5-sonnet-20241022
date: 2024-12-21
versions: 0.69.2.dev
seconds_per_case: 30.8
total_cost: 13.4847
date: 2025-01-17
versions: 0.71.2.dev
seconds_per_case: 21.4
total_cost: 14.4063
- dirname: 2024-12-30-20-57-12--gpt-4o-2024-11-20-ex-as-sys
test_cases: 225
model: gpt-4o-2024-11-20