feat: Add new benchmark test case for qwen-2.5-72b-instruct-diff model

This commit is contained in:
Paul Gauthier 2024-09-20 13:27:58 -07:00 committed by Paul Gauthier (aider)
parent 5139594fa0
commit 2753ac6b62
2 changed files with 16 additions and 16 deletions

View file

@ -1201,26 +1201,26 @@
versions: 0.56.0 versions: 0.56.0
seconds_per_case: 9.3 seconds_per_case: 9.3
total_cost: 0.0000 total_cost: 0.0000
- dirname: 2024-09-20-20-07-27--qwen-2.5-72b-instruct - dirname: 2024-09-20-20-20-19--qwen-2.5-72b-instruct-diff
test_cases: 133 test_cases: 133
model: qwen-2.5-72b-instruct (bf16) model: qwen-2.5-72b-instruct (bf16)
edit_format: whole edit_format: diff
commit_hash: d7051ce commit_hash: 5139594
pass_rate_1: 52.6 pass_rate_1: 53.4
pass_rate_2: 64.7 pass_rate_2: 65.4
percent_cases_well_formed: 100.0 percent_cases_well_formed: 96.2
error_outputs: 0 error_outputs: 9
num_malformed_responses: 0 num_malformed_responses: 9
num_with_malformed_responses: 0 num_with_malformed_responses: 5
user_asks: 2 user_asks: 3
lazy_comments: 0 lazy_comments: 0
syntax_errors: 0 syntax_errors: 2
indentation_errors: 0 indentation_errors: 1
exhausted_context_windows: 0 exhausted_context_windows: 0
test_timeouts: 0 test_timeouts: 3
command: aider --model openrouter/qwen/qwen-2.5-72b-instruct command: aider --model openrouter/qwen/qwen-2.5-72b-instruct
date: 2024-09-20 date: 2024-09-20
versions: 0.56.1.dev versions: 0.56.1.dev
seconds_per_case: 53.9 seconds_per_case: 39.8
total_cost: 0.0000 total_cost: 0.0000

View file

@ -549,7 +549,7 @@ def run_test_real(
chat_history_file=history_fname, chat_history_file=history_fname,
) )
main_model = models.Model(model_name) main_model = models.Model(model_name, weak_model=model_name)
edit_format = edit_format or main_model.edit_format edit_format = edit_format or main_model.edit_format
dump(main_model) dump(main_model)