mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-04 11:45:00 +00:00
feat: Add new benchmark test case for qwen-2.5-72b-instruct-diff model
This commit is contained in:
parent
5139594fa0
commit
2753ac6b62
2 changed files with 16 additions and 16 deletions
|
@ -1201,26 +1201,26 @@
|
||||||
versions: 0.56.0
|
versions: 0.56.0
|
||||||
seconds_per_case: 9.3
|
seconds_per_case: 9.3
|
||||||
total_cost: 0.0000
|
total_cost: 0.0000
|
||||||
|
|
||||||
- dirname: 2024-09-20-20-07-27--qwen-2.5-72b-instruct
|
- dirname: 2024-09-20-20-20-19--qwen-2.5-72b-instruct-diff
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: qwen-2.5-72b-instruct (bf16)
|
model: qwen-2.5-72b-instruct (bf16)
|
||||||
edit_format: whole
|
edit_format: diff
|
||||||
commit_hash: d7051ce
|
commit_hash: 5139594
|
||||||
pass_rate_1: 52.6
|
pass_rate_1: 53.4
|
||||||
pass_rate_2: 64.7
|
pass_rate_2: 65.4
|
||||||
percent_cases_well_formed: 100.0
|
percent_cases_well_formed: 96.2
|
||||||
error_outputs: 0
|
error_outputs: 9
|
||||||
num_malformed_responses: 0
|
num_malformed_responses: 9
|
||||||
num_with_malformed_responses: 0
|
num_with_malformed_responses: 5
|
||||||
user_asks: 2
|
user_asks: 3
|
||||||
lazy_comments: 0
|
lazy_comments: 0
|
||||||
syntax_errors: 0
|
syntax_errors: 2
|
||||||
indentation_errors: 0
|
indentation_errors: 1
|
||||||
exhausted_context_windows: 0
|
exhausted_context_windows: 0
|
||||||
test_timeouts: 0
|
test_timeouts: 3
|
||||||
command: aider --model openrouter/qwen/qwen-2.5-72b-instruct
|
command: aider --model openrouter/qwen/qwen-2.5-72b-instruct
|
||||||
date: 2024-09-20
|
date: 2024-09-20
|
||||||
versions: 0.56.1.dev
|
versions: 0.56.1.dev
|
||||||
seconds_per_case: 53.9
|
seconds_per_case: 39.8
|
||||||
total_cost: 0.0000
|
total_cost: 0.0000
|
|
@ -549,7 +549,7 @@ def run_test_real(
|
||||||
chat_history_file=history_fname,
|
chat_history_file=history_fname,
|
||||||
)
|
)
|
||||||
|
|
||||||
main_model = models.Model(model_name)
|
main_model = models.Model(model_name, weak_model=model_name)
|
||||||
edit_format = edit_format or main_model.edit_format
|
edit_format = edit_format or main_model.edit_format
|
||||||
|
|
||||||
dump(main_model)
|
dump(main_model)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue