This commit is contained in:
Paul Gauthier 2024-11-24 14:52:01 -08:00
parent 0c59d3234e
commit 86619052ca
2 changed files with 45 additions and 40 deletions

View file

@ -114,29 +114,29 @@
seconds_per_case: 98.4
total_cost: 0.0000
- dirname: 2024-11-20-15-17-37--qwen25-32b-or-diff
- dirname: 2024-11-24-22-18-18--or-all-or-fixed-blank-messages2
test_cases: 133
model: "OpenRouter: multiple"
edit_format: diff
commit_hash: e917424
pass_rate_1: 49.6
pass_rate_2: 65.4
percent_cases_well_formed: 84.2
error_outputs: 43
num_malformed_responses: 31
num_with_malformed_responses: 21
user_asks: 43
commit_hash: 0c59d32
pass_rate_1: 57.1
pass_rate_2: 67.7
percent_cases_well_formed: 95.5
error_outputs: 56
num_malformed_responses: 10
num_with_malformed_responses: 6
user_asks: 14
lazy_comments: 0
syntax_errors: 2
indentation_errors: 2
exhausted_context_windows: 12
test_timeouts: 2
syntax_errors: 6
indentation_errors: 0
exhausted_context_windows: 3
test_timeouts: 1
command: aider --model openrouter/qwen/qwen-2.5-coder-32b-instruct
date: 2024-11-20
versions: 0.63.3.dev
seconds_per_case: 40.7
total_cost: 0.1497
date: 2024-11-24
versions: 0.64.2.dev
seconds_per_case: 21.2
total_cost: 0.1420
- dirname: 2024-11-23-21-08-53--ollama-qwen2.5-coder:32b-instruct-q4_K_M-8kctx
test_cases: 133
model: "Ollama: q4_K_M"
@ -252,29 +252,29 @@
seconds_per_case: 16.1
total_cost: 0.1391
- dirname: 2024-11-24-14-36-59--qwen25-32b-or-hyperbolic
- dirname: 2024-11-24-22-03-19--or-hyperbolic-or-fixed-blank-messages2
test_cases: 133
model: "Hyperbolic via OpenRouter: BF16"
edit_format: diff
commit_hash: c2f184f
pass_rate_1: 40.6
pass_rate_2: 46.6
percent_cases_well_formed: 83.5
error_outputs: 80
num_malformed_responses: 73
num_with_malformed_responses: 22
user_asks: 56
commit_hash: 0c59d32
pass_rate_1: 55.6
pass_rate_2: 68.4
percent_cases_well_formed: 89.5
error_outputs: 28
num_malformed_responses: 24
num_with_malformed_responses: 14
user_asks: 29
lazy_comments: 0
syntax_errors: 15
syntax_errors: 1
indentation_errors: 0
exhausted_context_windows: 5
test_timeouts: 0
exhausted_context_windows: 4
test_timeouts: 1
command: aider --model openrouter/qwen/qwen-2.5-coder-32b-instruct
date: 2024-11-24
versions: 0.64.2.dev
seconds_per_case: 110.0
total_cost: 0.1763
seconds_per_case: 41.5
total_cost: 0.1402
- dirname: 2024-11-24-15-00-50--qwen25-32b-or-deepinfra
test_cases: 133
model: "Deepinfra via OpenRouter: BF16"