add o4-mini (high)

This commit is contained in:
Paul Gauthier 2025-04-16 17:24:55 -07:00
parent bb1fa24971
commit 7f28d63c33
2 changed files with 27 additions and 11 deletions

View file

@ -1326,9 +1326,6 @@
system_prompt_prefix: "Formatting re-enabled. " system_prompt_prefix: "Formatting re-enabled. "
accepts_settings: ["reasoning_effort"] accepts_settings: ["reasoning_effort"]
examples_as_sys_msg: true examples_as_sys_msg: true
#extra_params:
# extra_body:
# reasoning_effort: high
- name: openrouter/openai/o4-mini - name: openrouter/openai/o4-mini
edit_format: diff edit_format: diff
@ -1340,9 +1337,6 @@
system_prompt_prefix: "Formatting re-enabled. " system_prompt_prefix: "Formatting re-enabled. "
accepts_settings: ["reasoning_effort"] accepts_settings: ["reasoning_effort"]
examples_as_sys_msg: true examples_as_sys_msg: true
#extra_params:
# extra_body:
# reasoning_effort: high
- name: azure/o4-mini - name: azure/o4-mini
edit_format: diff edit_format: diff
@ -1354,9 +1348,6 @@
system_prompt_prefix: "Formatting re-enabled. " system_prompt_prefix: "Formatting re-enabled. "
accepts_settings: ["reasoning_effort"] accepts_settings: ["reasoning_effort"]
examples_as_sys_msg: true examples_as_sys_msg: true
#extra_params:
# extra_body:
# reasoning_effort: high
- name: o4-mini - name: o4-mini
edit_format: diff edit_format: diff

View file

@ -1095,7 +1095,7 @@
- dirname: 2025-04-16-21-20-55--o3-high-diff-temp0-exsys - dirname: 2025-04-16-21-20-55--o3-high-diff-temp0-exsys
test_cases: 225 test_cases: 225
model: o3 model: o3 (high)
edit_format: diff edit_format: diff
commit_hash: 24805ff-dirty commit_hash: 24805ff-dirty
pass_rate_1: 36.9 pass_rate_1: 36.9
@ -1118,4 +1118,29 @@
versions: 0.82.1.dev versions: 0.82.1.dev
seconds_per_case: 113.8 seconds_per_case: 113.8
total_cost: 111.0325 total_cost: 111.0325
- dirname: 2025-04-16-22-01-58--o4-mini-high-diff-exsys
test_cases: 225
model: o4-mini (high)
edit_format: diff
commit_hash: b66901f-dirty
pass_rate_1: 19.6
pass_rate_2: 72.0
pass_num_1: 44
pass_num_2: 162
percent_cases_well_formed: 90.7
error_outputs: 26
num_malformed_responses: 24
num_with_malformed_responses: 21
user_asks: 66
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 1
test_timeouts: 2
total_tests: 225
command: aider --model o4-mini
date: 2025-04-16
versions: 0.82.1.dev
seconds_per_case: 176.5
total_cost: 19.6399