diff --git a/aider/website/_data/o1_results.yml b/aider/website/_data/o1_results.yml index 728d6965e..96d2ac1d6 100644 --- a/aider/website/_data/o1_results.yml +++ b/aider/website/_data/o1_results.yml @@ -1,6 +1,6 @@ - dirname: 2024-07-18-18-57-46--gpt-4o-mini-whole test_cases: 133 - model: gpt-4o-mini + model: gpt-4o-mini (whole) edit_format: whole commit_hash: d31eef3-dirty pass_rate_1: 40.6 @@ -24,7 +24,7 @@ - dirname: 2024-07-04-14-32-08--claude-3.5-sonnet-diff-continue test_cases: 133 - model: claude-3.5-sonnet + model: claude-3.5-sonnet (diff) edit_format: diff commit_hash: 35f21b5 pass_rate_1: 57.1 @@ -48,7 +48,7 @@ - dirname: 2024-08-06-18-28-39--gpt-4o-2024-08-06-diff-again test_cases: 133 - model: gpt-4o-2024-08-06 + model: gpt-4o-2024-08-06 (diff) edit_format: diff commit_hash: ed9ed89 pass_rate_1: 57.1 @@ -72,7 +72,7 @@ - dirname: 2024-09-12-19-57-35--o1-mini-whole test_cases: 133 - model: o1-mini + model: o1-mini (whole) edit_format: whole commit_hash: 36fa773-dirty, 291b456 pass_rate_1: 49.6 diff --git a/aider/website/_posts/2024-09-12-o1.md b/aider/website/_posts/2024-09-12-o1.md index 4e0780184..e9d087a91 100644 --- a/aider/website/_posts/2024-09-12-o1.md +++ b/aider/website/_posts/2024-09-12-o1.md @@ -9,9 +9,9 @@ nav_exclude: true # Benchmark results for OpenAI o1-mini -OpenAI o1-mini is priced similarly to GPT-4o and Claude 3.5 Sonnet. -o1-mini scored below those models -when using the simple "whole" editing format. +OpenAI o1-mini is priced similarly to GPT-4o and Claude 3.5 Sonnet, +but scored below those models +when using the "whole" editing format. It was close enough to GPT-4o to be within the margin of error. The o1-mini model had trouble following the very simple whole editing format. @@ -21,8 +21,8 @@ the response format. Note that o1-mini's "whole" score is compared against GPT-4o and Sonnet "diff" results. -Using diff is more challenging for GPT-4o and Sonnet, -but it allows them to return search/replace blocks to +Using diff is more challenging, +but allows the model to return search/replace blocks to efficiently edit the source code. The whole format requires the o1-mini to return a fresh copy of the entire file, increasing costs and latency.