diff --git a/aider/models.py b/aider/models.py index f703c140a..9a1dccc9f 100644 --- a/aider/models.py +++ b/aider/models.py @@ -764,6 +764,15 @@ MODEL_SETTINGS = [ use_temperature=False, streaming=False, ), + ModelSettings( + "openrouter/openai/o1", + "diff", + weak_model_name="openrouter/openai/gpt-4o-mini", + editor_model_name="openrouter/openai/gpt-4o", + editor_edit_format="editor-diff", + use_repo_map=True, + use_temperature=False, + ), ModelSettings( "openrouter/qwen/qwen-2.5-coder-32b-instruct", "diff", diff --git a/aider/watch_prompts.py b/aider/watch_prompts.py index 898e3bee2..eca831ae2 100644 --- a/aider/watch_prompts.py +++ b/aider/watch_prompts.py @@ -1,11 +1,11 @@ watch_code_prompt = """ Find the "AI" comments below (marked with █) in the code files I've shared with you. -They contain your instructions. -Make the requested changes. -Be sure to remove all these "AI" comments from the code! +I've written your instructions there. +Follow my instructions as given in the AI comments and make the requested changes. +Also, be sure to remove all the "AI" comments from the code too. """ watch_ask_prompt = """/ask Find the "AI" comments below (marked with █) in the code files I've shared with you. -They contain your questions you need to answer and other instructions. +They contain my questions that I need you to answer and other instructions for you. """ diff --git a/aider/website/_data/edit_leaderboard.yml b/aider/website/_data/edit_leaderboard.yml index 0d842d32e..a2dd8ac9a 100644 --- a/aider/website/_data/edit_leaderboard.yml +++ b/aider/website/_data/edit_leaderboard.yml @@ -2180,4 +2180,27 @@ date: 2024-12-11 versions: 0.68.1.dev seconds_per_case: 7.3 + total_cost: 0.0000 + +- dirname: 2024-12-18-01-50-08--o1 + test_cases: 133 + model: openrouter/openai/o1 + edit_format: diff + commit_hash: 074c636-dirty + pass_rate_1: 65.4 + pass_rate_2: 84.2 + percent_cases_well_formed: 99.2 + error_outputs: 1 + num_malformed_responses: 1 + num_with_malformed_responses: 1 + user_asks: 0 + lazy_comments: 0 + syntax_errors: 0 + indentation_errors: 0 + exhausted_context_windows: 0 + test_timeouts: 2 + command: aider --model openrouter/openai/o1 + date: 2024-12-18 + versions: 0.69.2.dev + seconds_per_case: 29.9 total_cost: 0.0000 \ No newline at end of file diff --git a/benchmark/Dockerfile b/benchmark/Dockerfile index 8f0b44f08..f9c04413f 100644 --- a/benchmark/Dockerfile +++ b/benchmark/Dockerfile @@ -15,6 +15,17 @@ RUN apt-get update && apt-get install -y \ # Make python3.11 the default python3 RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 +# Install Go +RUN curl -OL https://golang.org/dl/go1.21.5.linux-amd64.tar.gz && \ + tar -C /usr/local -xzf go1.21.5.linux-amd64.tar.gz && \ + rm go1.21.5.linux-amd64.tar.gz +ENV PATH="/usr/local/go/bin:${PATH}" + +# Install Rust +ADD https://sh.rustup.rs /tmp/rustup.sh +RUN chmod +x /tmp/rustup.sh && /tmp/rustup.sh -y && rm /tmp/rustup.sh +ENV PATH="/root/.cargo/bin:${PATH}" + # Install Node.js and dependencies RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ apt-get install -y nodejs && \ @@ -33,17 +44,6 @@ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ core-js@3.37.1 \ eslint@8.49.0 -# Install Go -RUN curl -OL https://golang.org/dl/go1.21.5.linux-amd64.tar.gz && \ - tar -C /usr/local -xzf go1.21.5.linux-amd64.tar.gz && \ - rm go1.21.5.linux-amd64.tar.gz -ENV PATH="/usr/local/go/bin:${PATH}" - -# Install Rust -ADD https://sh.rustup.rs /tmp/rustup.sh -RUN chmod +x /tmp/rustup.sh && /tmp/rustup.sh -y && rm /tmp/rustup.sh -ENV PATH="/root/.cargo/bin:${PATH}" - COPY . /aider RUN pip3 install --no-cache-dir --upgrade pip uv RUN uv pip install --system --no-cache-dir -e /aider[dev] diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index cd9ceafe4..9f1ff2b4c 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -416,6 +416,7 @@ def load_results(dirname): results = json.loads(fname.read_text()) all_results.append(results) except json.JSONDecodeError: + print("json.JSONDecodeError", fname) continue return all_results @@ -655,7 +656,9 @@ def run_test_real( config = json.loads(f.read()) # Get solution and test files from config - solution_files = config.get("files", {}).get("solution", []) + solution_files = set(config.get("files", {}).get("solution", [])) + solution_files.discard("Cargo.toml") + test_files = config.get("files", {}).get("test", []) # Copy all solution files @@ -743,7 +746,10 @@ def run_test_real( # auto_lint=False, # disabled for code-in-json experiments cache_prompts=True, suggest_shell_commands=False, + ignore_mentions=set(test_files), ) + dump(coder.ignore_mentions) + coder.max_apply_update_errors = max_apply_update_errors coder.show_announcements() @@ -851,7 +857,7 @@ def run_unit_tests(original_dname, testdir, history_fname, test_files): # Map of file extensions to test commands TEST_COMMANDS = { ".py": ["pytest"], - ".rs": ["cargo", "test", "--", "--include-ignored"], + ".rs": ["cargo", "test", "--offline", "--", "--include-ignored"], ".cs": ["dotnet", "test"], ".go": ["go", "test", "./..."], ".js": ["/aider/benchmark/npm-test.sh"], diff --git a/benchmark/prompts.py b/benchmark/prompts.py index 13511d023..4fb3c387e 100644 --- a/benchmark/prompts.py +++ b/benchmark/prompts.py @@ -3,7 +3,7 @@ instructions_addendum = """ Use the above instructions to modify the supplied files: {file_list} Don't change the names of existing functions or classes, as they may be referenced from other code like unit tests, etc. -Only use standard python libraries, don't suggest installing any packages. +Only use standard libraries, don't suggest installing any packages. """ # noqa: E501 @@ -11,6 +11,6 @@ test_failures = """ #### See the testing errors above. -The tests are correct. +The tests are correct, don't try and change them. Fix the code in {file_list} to resolve the errors. """