diff --git a/aider/website/_data/architect.yml b/aider/website/_data/architect.yml index b4e0418ac..62d4cb7cf 100644 --- a/aider/website/_data/architect.yml +++ b/aider/website/_data/architect.yml @@ -465,3 +465,28 @@ versions: 0.44.1-dev seconds_per_case: 7.8 total_cost: 0.0916 + +- dirname: 2024-09-29-22-35-36--architect-o1preview-o1mini-whole + test_cases: 133 + model: o1-preview + edit_format: architect + commit_hash: 53ca83b + editor_model: o1-mini + editor_edit_format: whole + pass_rate_1: 65.4 + pass_rate_2: 85.0 + percent_cases_well_formed: 100.0 + error_outputs: 0 + num_malformed_responses: 0 + num_with_malformed_responses: 0 + user_asks: 179 + lazy_comments: 4 + syntax_errors: 0 + indentation_errors: 0 + exhausted_context_windows: 0 + test_timeouts: 1 + command: aider --model o1-preview + date: 2024-09-29 + versions: 0.58.1.dev + seconds_per_case: 39.7 + total_cost: 36.2078 \ No newline at end of file diff --git a/aider/website/_posts/2024-09-26-architect.md b/aider/website/_posts/2024-09-26-architect.md index 23cb444b0..6e78a3f2e 100644 --- a/aider/website/_posts/2024-09-26-architect.md +++ b/aider/website/_posts/2024-09-26-architect.md @@ -19,7 +19,10 @@ Aider now has experimental support for using two models to complete each coding Splitting up "code reasoning" and "code editing" in this manner has produced SOTA results on [aider's code editing benchmark](/docs/benchmarks.html#the-benchmark). -It also significantly improved the benchmark scores of many +Using o1-preview as the Architect with either DeepSeek or o1-mini as the +Editor produced the SOTA score of 85%. +Using the Architect/Editor approach +also significantly improved the benchmark scores of many models, compared to their previous "solo" baseline scores (striped bars).