mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-31 01:35:00 +00:00
update o1-preview leaderboard to diff only
This commit is contained in:
parent
b27738d39a
commit
565c305aa6
2 changed files with 17 additions and 38 deletions
|
@ -6,6 +6,8 @@
|
|||
- Support for OpenAI o1 models:
|
||||
- `aider --model o1-mini`
|
||||
- `aider --model o1-preview`
|
||||
- o1-preview now works well with diff edit format.
|
||||
- o1-preview with diff now matches SOTA leaderboard result with whole edit format.
|
||||
- On Windows, `/run` correctly uses PowerShell or cmd.exe.
|
||||
- Support for new 08-2024 Cohere models, by @jalammar.
|
||||
- Can now recursively add directories with `/read-only`.
|
||||
|
|
|
@ -1133,52 +1133,29 @@
|
|||
seconds_per_case: 177.7
|
||||
total_cost: 11.1071
|
||||
|
||||
- dirname: 2024-09-12-22-44-14--o1-preview-diff
|
||||
- dirname: 2024-09-21-16-45-11--o1-preview-flex-sr-markers
|
||||
test_cases: 133
|
||||
model: o1-preview (diff)
|
||||
model: o1-preview
|
||||
edit_format: diff
|
||||
commit_hash: 72f52bd
|
||||
pass_rate_1: 56.4
|
||||
pass_rate_2: 75.2
|
||||
percent_cases_well_formed: 84.2
|
||||
error_outputs: 27
|
||||
num_malformed_responses: 27
|
||||
num_with_malformed_responses: 21
|
||||
user_asks: 8
|
||||
lazy_comments: 0
|
||||
syntax_errors: 7
|
||||
indentation_errors: 3
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 3
|
||||
command: aider --model o1-preview
|
||||
date: 2024-09-12
|
||||
versions: 0.56.1.dev
|
||||
seconds_per_case: 95.8
|
||||
total_cost: 71.7927
|
||||
|
||||
- dirname: 2024-09-13-02-13-59--o1-preview-whole
|
||||
test_cases: 133
|
||||
model: o1-preview (whole)
|
||||
edit_format: whole
|
||||
commit_hash: 72f52bd-dirty
|
||||
pass_rate_1: 58.6
|
||||
commit_hash: 5493654-dirty
|
||||
pass_rate_1: 57.9
|
||||
pass_rate_2: 79.7
|
||||
percent_cases_well_formed: 100.0
|
||||
error_outputs: 0
|
||||
num_malformed_responses: 0
|
||||
num_with_malformed_responses: 0
|
||||
user_asks: 2
|
||||
percent_cases_well_formed: 93.2
|
||||
error_outputs: 11
|
||||
num_malformed_responses: 11
|
||||
num_with_malformed_responses: 9
|
||||
user_asks: 3
|
||||
lazy_comments: 0
|
||||
syntax_errors: 1
|
||||
syntax_errors: 10
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 2
|
||||
test_timeouts: 1
|
||||
command: aider --model o1-preview
|
||||
date: 2024-09-13
|
||||
date: 2024-09-21
|
||||
versions: 0.56.1.dev
|
||||
seconds_per_case: 47.4
|
||||
total_cost: 38.0612
|
||||
|
||||
seconds_per_case: 80.9
|
||||
total_cost: 63.9190
|
||||
|
||||
- dirname: 2024-09-19-16-58-29--qwen2.5-coder:7b-instruct-q8_0
|
||||
test_cases: 133
|
||||
model: qwen2.5-coder:7b-instruct-q8_0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue