Improved prompting for both GPT-4 and GPT-4 Turbo:

- Uses 2-shot examples, which highlight the need to: - Use multiple SEARCH/REPLACE blocks to make changes spread across multiple locations in a file (rather one "big" S/R with ... elides ... of unchanging chunks of code). - Include comments, docstrings, etc in the S/R blocks. - Updated list of explicit "rules" for constructing S/R blocks. For `gpt-4-1106-preview` the new prompts reduced `num_errors` from 25-30 -> 12-14, indicating improved ability to generate proper SEARCH/REPLACE blocks. Benchmark results of 54%/65% are stable or perhaps slightly better than the previous prompts. For `gpt-4-0613` the new prompts produce a major improvement in benchmark results. Performance leaps from 47%/64% up to 51%/71%. This now makes `aider + gpt-4-0613` better at coding than `aider + gpt-4-1106-preview` after the 2nd try. And competitive after the 1st try. benchmarks/2023-11-14-19-34-51--preview-prompting-positivity: test-cases: 133 model: gpt-4-1106-preview edit_format: diff commit_hash: 9cbe114 num_error_outputs: 14 num_user_asks: 0 num_exhausted_context_windows 0 test_timeouts: 2 53.4% correct after try 0 64.7% correct after try 1 duration: 23.6 sec/test-case costs: $0.0495/test-case, $6.58 total, $6.58 projected benchmarks/2023-11-14-19-55-31--preview-prompt-against-0613 test-cases: 133 model: gpt-4-0613 edit_format: diff commit_hash: 9cbe114 num_error_outputs: 10 num_user_asks: 0 num_exhausted_context_windows 0 test_timeouts: 1 51.1% correct after try 0 70.7% correct after try 1 duration: 52.5 sec/test-case costs: $0.1229/test-case, $16.34 total, $16.34 projected
2025-06-01 02:05:00 +00:00 · 2023-11-14 13:11:31 -08:00 · 2023-11-14 13:11:31 -08:00 · 0f9053008a
commit 0f9053008a
parent f00c7922d5 4aba307ebd
5 changed files with 225 additions and 60 deletions
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@ -316,10 +316,6 @@ class Coder:
            dict(role="user", content=all_content),
            dict(role="assistant", content="Ok."),
        ]
        if self.abs_fnames:
            files_messages += [
                dict(role="system", content=self.fmt_system_reminder()),
            ]
        return files_messages
@ -415,21 +411,14 @@ class Coder:
        return self.send_new_user_message(inp)
-    def fmt_system_reminder(self):
+    def fmt_system_prompt(self, prompt):
        prompt = self.gpt_prompts.system_reminder
        prompt = prompt.format(fence=self.fence)
        return prompt
-    def send_new_user_message(self, inp):
+    def format_messages(self):
        self.choose_fence()
-
+        main_sys = self.fmt_system_prompt(self.gpt_prompts.main_system)
-        self.cur_messages += [
+        main_sys += "\n" + self.fmt_system_prompt(self.gpt_prompts.system_reminder)
            dict(role="user", content=inp),
        ]
        main_sys = self.gpt_prompts.main_system
        # if self.main_model.max_context_tokens > 4 * 1024:
        main_sys += "\n" + self.fmt_system_reminder()
        messages = [
            dict(role="system", content=main_sys),
@ -438,8 +427,36 @@ class Coder:
        self.summarize_end()
        messages += self.done_messages
        messages += self.get_files_messages()
        reminder_message = [
            dict(role="system", content=self.fmt_system_prompt(self.gpt_prompts.system_reminder)),
        ]
        messages_tokens = self.main_model.token_count(messages)
        reminder_tokens = self.main_model.token_count(reminder_message)
        cur_tokens = self.main_model.token_count(self.cur_messages)
        if None not in (messages_tokens, reminder_tokens, cur_tokens):
            total_tokens = messages_tokens + reminder_tokens + cur_tokens
        else:
            # add the reminder anyway
            total_tokens = 0
        # Add the reminder prompt if we still have room to include it.
        if total_tokens < self.main_model.max_context_tokens:
            messages += reminder_message
        messages += self.cur_messages
        return messages
    def send_new_user_message(self, inp):
        self.cur_messages += [
            dict(role="user", content=inp),
        ]
        messages = self.format_messages()
        if self.verbose:
            utils.show_messages(messages, functions=self.functions)
--- a/aider/coders/editblock_prompts.py
+++ b/aider/coders/editblock_prompts.py
@ -7,56 +7,182 @@ class EditBlockPrompts(CoderPrompts):
    main_system = """Act as an expert software developer.
 Always use best practices when coding.
 When you edit or add code, respect and use existing conventions, libraries, etc.
 Always COMPLETELY IMPLEMENT the needed code.
 Take requests for changes to the supplied code.
 If the request is ambiguous, ask questions.
 Once you understand the request you MUST:
-1. List the files you need to modify. *NEVER* suggest changes to a *read-only* file. Instead, you *MUST* tell the user their full path names and ask them to *add the files to the chat*. End your reply and wait for their approval.
+1. List the files you need to modify. Only suggest changes to a *read-write* files. Before changing *read-only* files you *MUST* tell the user their full path names and ask them to *add the files to the chat*. End your reply and wait for their approval.
-2. Think step-by-step and explain the needed changes.
+2. Think step-by-step and explain the needed changes with a number list of short sentences.
-3. Describe each change with a *SEARCH/REPLACE block* per the example below.
+3. Describe each change with a *SEARCH/REPLACE block* per the examples below.
 """
-    system_reminder = """You MUST use a *SEARCH/REPLACE block* to modify the source file:
+All changes to files must use this *SEARCH/REPLACE block* format.
 # Example conversation 1:
 ## USER: These are the *read-write* files:
 mathweb/flask/app.py
 {fence[0]}python
 from flask import Flask
 app = Flask(__name__)
@app.route('/div/<int:n>/<int:d>')
 def divide(n, d):
    if d == 0:
        return 'Error: Division by zero'
    else:
        result = n / d
        return str(result)
 def factorial(n):
    "compute factorial"
    if n == 0:
        return 1
    else:
        return n * factorial(n-1)
@app.route('/factorial/<int:n>')
 def get_factorial(n):
    return str(factorial(n))
 if __name__ == '__main__':
    print("Starting...")
    app.run()
 {fence[1]}
 ## ASSISTANT: Ok.
 ## USER: Change get_factorial() to use math.factorial
 ## ASSISTANT:
 Do make this change we need to modify `mathweb/flask/app.py` to:
 1. Import the math package.
 2. Remove the existing factorial() function.
 3. Update get_factorial() to call math.factorial instead.
 Here are the *SEARCH/REPLACE* blocks:
 mathweb/flask/app.py
 {fence[0]}python
 some/dir/example.py
 <<<<<<< SEARCH
-    # Multiplication function
+from flask import Flask
    def multiply(a,b)
        "multiply 2 numbers"
        return a*b
 =======
-    # Addition function
+import math
-    def add(a,b):
+from flask import Flask
        "add 2 numbers"
        return a+b
 >>>>>>> REPLACE
 {fence[1]}
-The *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character.
+mathweb/flask/app.py
-The *SEARCH/REPLACE block* must be concise.
+{fence[0]}python
-Include just enough lines to uniquely specify the change.
+<<<<<<< SEARCH
-Don't include extra unchanging lines.
+def factorial(n):
    "compute factorial"
-Every *SEARCH/REPLACE block* must be fenced with {fence[0]} and {fence[1]}, with the correct code language.
+    if n == 0:
        return 1
    else:
        return n * factorial(n-1)
-Every *SEARCH/REPLACE block* must start with the full path!
+=======
-NEVER try to *SEARCH/REPLACE* any *read-only* files.
+>>>>>>> REPLACE
 {fence[1]}
 mathweb/flask/app.py
 {fence[0]}python
 <<<<<<< SEARCH
    return str(factorial(n))
 =======
    return str(math.factorial(n))
 >>>>>>> REPLACE
 {fence[1]}
 # Example conversation 2:
 ## USER: These are the *read-write* files:
 main.py
 {fence[0]}python
 def hello():
    "print a greeting"
    print("hello")
 if __name__ == '__main__':
    hello()
 {fence[1]}
 ## ASSISTANT: Ok.
 ## USER: Refactor hello() into its own file.
 ## ASSISTANT:
 Do make this change we need to modify `main.py` and make a new file `hello.py`:
 1. Make a new hello.py file with hello() in it.
 2. Remove hello() from main.py and replace it with an import.
 Here are the *SEARCH/REPLACE* blocks:
 hello.py
 {fence[0]}python
 <<<<<<< SEARCH
 =======
 def hello():
    "print a greeting"
    print("hello")
 >>>>>>> REPLACE
 {fence[1]}
 main.py
 {fence[0]}python
 <<<<<<< SEARCH
 def hello():
    "print a greeting"
    print("hello")
 =======
 from hello import hello
 >>>>>>> REPLACE
 {fence[1]}
 # Rules
 """
    system_reminder = """Every *SEARCH/REPLACE block* must use this format:
 1. The file path alone on a line, eg: main.py
 2. The opening fence and code language, eg: {fence[0]}python
 3. The start of search block: <<<<<<< SEARCH
 4. A contiguous chunk of lines to search for in the existing source code
 5. The dividing line: =======
 6. The lines to replace into the source code
 7. The end of the replace block: >>>>>>> REPLACE
 8. The closing fence: {fence[1]}
 Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
 Include *ALL* the code being searched and replaced!
 Only *SEARCH/REPLACE* files that are *read-write*.
 If you want to put code in a new file, use a *SEARCH/REPLACE block* with:
 - A new file path, including dir name if needed
 - An empty `SEARCH` section
- The new file's contents in the `updated` section
+- The new file's contents in the `REPLACE` section
 """
    files_content_prefix = "These are the *read-write* files:\n"
    files_no_full_files = "I am not sharing any *read-write* files yet."
-    repo_content_prefix = """Below here are summaries of other files!
+    repo_content_prefix = """Below here are summaries of other files present in this git repository.
 Do not propose changes to these files, they are *read-only*.
-To make a file *read-write*, ask me to *add it to the chat*.
+To make a file *read-write*, ask the user to *add it to the chat*.
 """
--- a/aider/commands.py
+++ b/aider/commands.py
@ -1,4 +1,3 @@
 import json
 import re
 import subprocess
 import sys
@ -104,20 +103,27 @@ class Commands:
        res = []
        self.coder.choose_fence()
        # system messages
        main_sys = self.coder.fmt_system_prompt(self.coder.gpt_prompts.main_system)
        main_sys += "\n" + self.coder.fmt_system_prompt(self.coder.gpt_prompts.system_reminder)
        msgs = [
-            dict(role="system", content=self.coder.gpt_prompts.main_system),
+            dict(role="system", content=main_sys),
-            dict(role="system", content=self.coder.gpt_prompts.system_reminder),
+            dict(
                role="system",
                content=self.coder.fmt_system_prompt(self.coder.gpt_prompts.system_reminder),
            ),
        ]
-        tokens = len(self.tokenizer.encode(json.dumps(msgs)))
+
        tokens = self.coder.main_model.token_count(msgs)
        res.append((tokens, "system messages", ""))
        # chat history
        msgs = self.coder.done_messages + self.coder.cur_messages
        if msgs:
            msgs = [dict(role="dummy", content=msg) for msg in msgs]
-            msgs = json.dumps(msgs)
+            tokens = self.coder.main_model.token_count(msgs)
            tokens = len(self.tokenizer.encode(msgs))
            res.append((tokens, "chat history", "use /clear to clear"))
        # repo map
@ -125,7 +131,7 @@ class Commands:
        if self.coder.repo_map:
            repo_content = self.coder.repo_map.get_repo_map(self.coder.abs_fnames, other_files)
            if repo_content:
-                tokens = len(self.tokenizer.encode(repo_content))
+                tokens = self.coder.main_model.token_count(repo_content)
                res.append((tokens, "repository map", "use --map-tokens to resize"))
        # files
@ -134,7 +140,7 @@ class Commands:
            content = self.io.read_text(fname)
            # approximate
            content = f"{relative_fname}\n```\n" + content + "```\n"
-            tokens = len(self.tokenizer.encode(content))
+            tokens = self.coder.main_model.token_count(content)
            res.append((tokens, f"{relative_fname}", "use /drop to drop from chat"))
        self.io.tool_output("Approximate context window usage, in tokens:")
--- a/aider/models/model.py
+++ b/aider/models/model.py
@ -1,3 +1,5 @@
 import json
 import openai
@ -37,3 +39,14 @@ class Model:
    @staticmethod
    def commit_message_models():
        return [Model.create("gpt-3.5-turbo"), Model.create("gpt-3.5-turbo-16k")]
    def token_count(self, messages):
        if not self.tokenizer:
            return
        if type(messages) is str:
            msgs = messages
        else:
            msgs = json.dumps(messages)
        return len(self.tokenizer.encode(msgs))
--- a/tests/test_main.py
+++ b/tests/test_main.py
@ -113,20 +113,23 @@ class TestMain(TestCase):
        self.assertEqual(".aider*", gitignore.read_text().splitlines()[0])
    def test_check_gitignore(self):
-        make_repo()
+        with tempfile.NamedTemporaryFile() as temp_gitconfig:
-        io = InputOutput(pretty=False, yes=True)
+            os.environ['GIT_CONFIG_GLOBAL'] = temp_gitconfig.name
-        cwd = Path.cwd()
+            make_repo()
-        gitignore = cwd / ".gitignore"
+            io = InputOutput(pretty=False, yes=True)
            cwd = Path.cwd()
            gitignore = cwd / ".gitignore"
-        self.assertFalse(gitignore.exists())
+            self.assertFalse(gitignore.exists())
-        check_gitignore(cwd, io)
+            check_gitignore(cwd, io)
-        self.assertTrue(gitignore.exists())
+            self.assertTrue(gitignore.exists())
-        self.assertEqual(".aider*", gitignore.read_text().splitlines()[0])
+            self.assertEqual(".aider*", gitignore.read_text().splitlines()[0])
-        gitignore.write_text("one\ntwo\n")
+            gitignore.write_text("one\ntwo\n")
-        check_gitignore(cwd, io)
+            check_gitignore(cwd, io)
-        self.assertEqual("one\ntwo\n.aider*\n", gitignore.read_text())
+            self.assertEqual("one\ntwo\n.aider*\n", gitignore.read_text())
            del os.environ['GIT_CONFIG_GLOBAL']
    def test_main_git_ignore(self):
        cwd = Path().cwd()