diff --git a/aider/website/_data/edit_leaderboard.yml b/aider/website/_data/edit_leaderboard.yml index 61f9f62cc..4b538e798 100644 --- a/aider/website/_data/edit_leaderboard.yml +++ b/aider/website/_data/edit_leaderboard.yml @@ -1201,26 +1201,26 @@ versions: 0.56.0 seconds_per_case: 9.3 total_cost: 0.0000 - -- dirname: 2024-09-20-20-07-27--qwen-2.5-72b-instruct + +- dirname: 2024-09-20-20-20-19--qwen-2.5-72b-instruct-diff test_cases: 133 model: qwen-2.5-72b-instruct (bf16) - edit_format: whole - commit_hash: d7051ce - pass_rate_1: 52.6 - pass_rate_2: 64.7 - percent_cases_well_formed: 100.0 - error_outputs: 0 - num_malformed_responses: 0 - num_with_malformed_responses: 0 - user_asks: 2 + edit_format: diff + commit_hash: 5139594 + pass_rate_1: 53.4 + pass_rate_2: 65.4 + percent_cases_well_formed: 96.2 + error_outputs: 9 + num_malformed_responses: 9 + num_with_malformed_responses: 5 + user_asks: 3 lazy_comments: 0 - syntax_errors: 0 - indentation_errors: 0 + syntax_errors: 2 + indentation_errors: 1 exhausted_context_windows: 0 - test_timeouts: 0 + test_timeouts: 3 command: aider --model openrouter/qwen/qwen-2.5-72b-instruct date: 2024-09-20 versions: 0.56.1.dev - seconds_per_case: 53.9 + seconds_per_case: 39.8 total_cost: 0.0000 \ No newline at end of file diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 3d559b9be..360e0705c 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -549,7 +549,7 @@ def run_test_real( chat_history_file=history_fname, ) - main_model = models.Model(model_name) + main_model = models.Model(model_name, weak_model=model_name) edit_format = edit_format or main_model.edit_format dump(main_model)