Improved prompting for both GPT-4 and GPT-4 Turbo:

- Uses 2-shot examples, which highlight the need to: - Use multiple SEARCH/REPLACE blocks to make changes spread across multiple locations in a file (rather one "big" S/R with ... elides ... of unchanging chunks of code). - Include comments, docstrings, etc in the S/R blocks. - Updated list of explicit "rules" for constructing S/R blocks. For `gpt-4-1106-preview` the new prompts reduced `num_errors` from 25-30 -> 12-14, indicating improved ability to generate proper SEARCH/REPLACE blocks. Benchmark results of 54%/65% are stable or perhaps slightly better than the previous prompts. For `gpt-4-0613` the new prompts produce a major improvement in benchmark results. Performance leaps from 47%/64% up to 51%/71%. This now makes `aider + gpt-4-0613` better at coding than `aider + gpt-4-1106-preview` after the 2nd try. And competitive after the 1st try. benchmarks/2023-11-14-19-34-51--preview-prompting-positivity: test-cases: 133 model: gpt-4-1106-preview edit_format: diff commit_hash: 9cbe114 num_error_outputs: 14 num_user_asks: 0 num_exhausted_context_windows 0 test_timeouts: 2 53.4% correct after try 0 64.7% correct after try 1 duration: 23.6 sec/test-case costs: $0.0495/test-case, $6.58 total, $6.58 projected benchmarks/2023-11-14-19-55-31--preview-prompt-against-0613 test-cases: 133 model: gpt-4-0613 edit_format: diff commit_hash: 9cbe114 num_error_outputs: 10 num_user_asks: 0 num_exhausted_context_windows 0 test_timeouts: 1 51.1% correct after try 0 70.7% correct after try 1 duration: 52.5 sec/test-case costs: $0.1229/test-case, $16.34 total, $16.34 projected
2025-05-30 17:24:59 +00:00 · 2023-11-14 13:11:31 -08:00 · 2023-11-14 13:11:31 -08:00 · 0f9053008a
commit 0f9053008a
parent f00c7922d5 4aba307ebd
5 changed files with 225 additions and 60 deletions
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@ -316,10 +316,6 @@ class Coder:
            dict(role="user", content=all_content),
            dict(role="assistant", content="Ok."),
        ]
-        if self.abs_fnames:
-            files_messages += [
-                dict(role="system", content=self.fmt_system_reminder()),
-            ]

        return files_messages

@ -415,21 +411,14 @@ class Coder:

        return self.send_new_user_message(inp)

-    def fmt_system_reminder(self):
-        prompt = self.gpt_prompts.system_reminder
+    def fmt_system_prompt(self, prompt):
        prompt = prompt.format(fence=self.fence)
        return prompt

-    def send_new_user_message(self, inp):
+    def format_messages(self):
        self.choose_fence()
-
-        self.cur_messages += [
-            dict(role="user", content=inp),
-        ]
-
-        main_sys = self.gpt_prompts.main_system
-        # if self.main_model.max_context_tokens > 4 * 1024:
-        main_sys += "\n" + self.fmt_system_reminder()
+        main_sys = self.fmt_system_prompt(self.gpt_prompts.main_system)
+        main_sys += "\n" + self.fmt_system_prompt(self.gpt_prompts.system_reminder)

        messages = [
            dict(role="system", content=main_sys),
@ -438,8 +427,36 @@ class Coder:
        self.summarize_end()
        messages += self.done_messages
        messages += self.get_files_messages()
+
+        reminder_message = [
+            dict(role="system", content=self.fmt_system_prompt(self.gpt_prompts.system_reminder)),
+        ]
+
+        messages_tokens = self.main_model.token_count(messages)
+        reminder_tokens = self.main_model.token_count(reminder_message)
+        cur_tokens = self.main_model.token_count(self.cur_messages)
+
+        if None not in (messages_tokens, reminder_tokens, cur_tokens):
+            total_tokens = messages_tokens + reminder_tokens + cur_tokens
+        else:
+            # add the reminder anyway
+            total_tokens = 0
+
+        # Add the reminder prompt if we still have room to include it.
+        if total_tokens < self.main_model.max_context_tokens:
+            messages += reminder_message
+
        messages += self.cur_messages

+        return messages
+
+    def send_new_user_message(self, inp):
+        self.cur_messages += [
+            dict(role="user", content=inp),
+        ]
+
+        messages = self.format_messages()
+
        if self.verbose:
            utils.show_messages(messages, functions=self.functions)

--- a/aider/coders/editblock_prompts.py
+++ b/aider/coders/editblock_prompts.py
@ -7,56 +7,182 @@ class EditBlockPrompts(CoderPrompts):
    main_system = """Act as an expert software developer.
 Always use best practices when coding.
 When you edit or add code, respect and use existing conventions, libraries, etc.
+Always COMPLETELY IMPLEMENT the needed code.

 Take requests for changes to the supplied code.
 If the request is ambiguous, ask questions.

 Once you understand the request you MUST:
-1. List the files you need to modify. *NEVER* suggest changes to a *read-only* file. Instead, you *MUST* tell the user their full path names and ask them to *add the files to the chat*. End your reply and wait for their approval.
-2. Think step-by-step and explain the needed changes.
-3. Describe each change with a *SEARCH/REPLACE block* per the example below.
-"""
+1. List the files you need to modify. Only suggest changes to a *read-write* files. Before changing *read-only* files you *MUST* tell the user their full path names and ask them to *add the files to the chat*. End your reply and wait for their approval.
+2. Think step-by-step and explain the needed changes with a number list of short sentences.
+3. Describe each change with a *SEARCH/REPLACE block* per the examples below.

-    system_reminder = """You MUST use a *SEARCH/REPLACE block* to modify the source file:
+All changes to files must use this *SEARCH/REPLACE block* format.

+# Example conversation 1:
+
+## USER: These are the *read-write* files:
+
+mathweb/flask/app.py
+{fence[0]}python
+from flask import Flask
+
+app = Flask(__name__)
+
+@app.route('/div/<int:n>/<int:d>')
+def divide(n, d):
+    if d == 0:
+        return 'Error: Division by zero'
+    else:
+        result = n / d
+        return str(result)
+
+def factorial(n):
+    "compute factorial"
+
+    if n == 0:
+        return 1
+    else:
+        return n * factorial(n-1)
+
+@app.route('/factorial/<int:n>')
+def get_factorial(n):
+    return str(factorial(n))
+
+if __name__ == '__main__':
+    print("Starting...")
+    app.run()
+{fence[1]}
+
+## ASSISTANT: Ok.
+
+## USER: Change get_factorial() to use math.factorial
+
+## ASSISTANT:
+
+Do make this change we need to modify `mathweb/flask/app.py` to:
+
+1. Import the math package.
+2. Remove the existing factorial() function.
+3. Update get_factorial() to call math.factorial instead.
+
+Here are the *SEARCH/REPLACE* blocks:
+
+mathweb/flask/app.py
 {fence[0]}python
-some/dir/example.py
 <<<<<<< SEARCH
-    # Multiplication function
-    def multiply(a,b)
-        "multiply 2 numbers"
-
-        return a*b
+from flask import Flask
 =======
-    # Addition function
-    def add(a,b):
-        "add 2 numbers"
-
-        return a+b
+import math
+from flask import Flask
 >>>>>>> REPLACE
 {fence[1]}

-The *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character.
-The *SEARCH/REPLACE block* must be concise.
-Include just enough lines to uniquely specify the change.
-Don't include extra unchanging lines.
+mathweb/flask/app.py
+{fence[0]}python
+<<<<<<< SEARCH
+def factorial(n):
+    "compute factorial"

-Every *SEARCH/REPLACE block* must be fenced with {fence[0]} and {fence[1]}, with the correct code language.
+    if n == 0:
+        return 1
+    else:
+        return n * factorial(n-1)

-Every *SEARCH/REPLACE block* must start with the full path!
-NEVER try to *SEARCH/REPLACE* any *read-only* files.
+=======
+>>>>>>> REPLACE
+{fence[1]}
+
+mathweb/flask/app.py
+{fence[0]}python
+<<<<<<< SEARCH
+    return str(factorial(n))
+=======
+    return str(math.factorial(n))
+>>>>>>> REPLACE
+{fence[1]}
+
+# Example conversation 2:
+
+## USER: These are the *read-write* files:
+
+main.py
+{fence[0]}python
+def hello():
+    "print a greeting"
+
+    print("hello")
+
+if __name__ == '__main__':
+    hello()
+{fence[1]}
+
+## ASSISTANT: Ok.
+
+## USER: Refactor hello() into its own file.
+
+## ASSISTANT:
+
+Do make this change we need to modify `main.py` and make a new file `hello.py`:
+
+1. Make a new hello.py file with hello() in it.
+2. Remove hello() from main.py and replace it with an import.
+
+Here are the *SEARCH/REPLACE* blocks:
+
+hello.py
+{fence[0]}python
+<<<<<<< SEARCH
+=======
+def hello():
+    "print a greeting"
+
+    print("hello")
+>>>>>>> REPLACE
+{fence[1]}
+
+main.py
+{fence[0]}python
+<<<<<<< SEARCH
+def hello():
+    "print a greeting"
+
+    print("hello")
+=======
+from hello import hello
+>>>>>>> REPLACE
+{fence[1]}
+
+# Rules
+"""
+
+    system_reminder = """Every *SEARCH/REPLACE block* must use this format:
+1. The file path alone on a line, eg: main.py
+2. The opening fence and code language, eg: {fence[0]}python
+3. The start of search block: <<<<<<< SEARCH
+4. A contiguous chunk of lines to search for in the existing source code
+5. The dividing line: =======
+6. The lines to replace into the source code
+7. The end of the replace block: >>>>>>> REPLACE
+8. The closing fence: {fence[1]}
+
+Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
+
+Include *ALL* the code being searched and replaced!
+
+Only *SEARCH/REPLACE* files that are *read-write*.

 If you want to put code in a new file, use a *SEARCH/REPLACE block* with:
 - A new file path, including dir name if needed
 - An empty `SEARCH` section
- The new file's contents in the `updated` section
+- The new file's contents in the `REPLACE` section
 """

    files_content_prefix = "These are the *read-write* files:\n"

    files_no_full_files = "I am not sharing any *read-write* files yet."

-    repo_content_prefix = """Below here are summaries of other files!
+    repo_content_prefix = """Below here are summaries of other files present in this git repository.
 Do not propose changes to these files, they are *read-only*.
-To make a file *read-write*, ask me to *add it to the chat*.
+To make a file *read-write*, ask the user to *add it to the chat*.
 """
--- a/aider/commands.py
+++ b/aider/commands.py
@ -1,4 +1,3 @@
-import json
 import re
 import subprocess
 import sys
@ -104,20 +103,27 @@ class Commands:

        res = []

+        self.coder.choose_fence()
+
        # system messages
+        main_sys = self.coder.fmt_system_prompt(self.coder.gpt_prompts.main_system)
+        main_sys += "\n" + self.coder.fmt_system_prompt(self.coder.gpt_prompts.system_reminder)
        msgs = [
-            dict(role="system", content=self.coder.gpt_prompts.main_system),
-            dict(role="system", content=self.coder.gpt_prompts.system_reminder),
+            dict(role="system", content=main_sys),
+            dict(
+                role="system",
+                content=self.coder.fmt_system_prompt(self.coder.gpt_prompts.system_reminder),
+            ),
        ]
-        tokens = len(self.tokenizer.encode(json.dumps(msgs)))
+
+        tokens = self.coder.main_model.token_count(msgs)
        res.append((tokens, "system messages", ""))

        # chat history
        msgs = self.coder.done_messages + self.coder.cur_messages
        if msgs:
            msgs = [dict(role="dummy", content=msg) for msg in msgs]
-            msgs = json.dumps(msgs)
-            tokens = len(self.tokenizer.encode(msgs))
+            tokens = self.coder.main_model.token_count(msgs)
            res.append((tokens, "chat history", "use /clear to clear"))

        # repo map
@ -125,7 +131,7 @@ class Commands:
        if self.coder.repo_map:
            repo_content = self.coder.repo_map.get_repo_map(self.coder.abs_fnames, other_files)
            if repo_content:
-                tokens = len(self.tokenizer.encode(repo_content))
+                tokens = self.coder.main_model.token_count(repo_content)
                res.append((tokens, "repository map", "use --map-tokens to resize"))

        # files
@ -134,7 +140,7 @@ class Commands:
            content = self.io.read_text(fname)
            # approximate
            content = f"{relative_fname}\n```\n" + content + "```\n"
-            tokens = len(self.tokenizer.encode(content))
+            tokens = self.coder.main_model.token_count(content)
            res.append((tokens, f"{relative_fname}", "use /drop to drop from chat"))

        self.io.tool_output("Approximate context window usage, in tokens:")
--- a/aider/models/model.py
+++ b/aider/models/model.py
@ -1,3 +1,5 @@
+import json
+
 import openai


@ -37,3 +39,14 @@ class Model:
    @staticmethod
    def commit_message_models():
        return [Model.create("gpt-3.5-turbo"), Model.create("gpt-3.5-turbo-16k")]
+
+    def token_count(self, messages):
+        if not self.tokenizer:
+            return
+
+        if type(messages) is str:
+            msgs = messages
+        else:
+            msgs = json.dumps(messages)
+
+        return len(self.tokenizer.encode(msgs))
--- a/tests/test_main.py
+++ b/tests/test_main.py
@ -113,20 +113,23 @@ class TestMain(TestCase):
        self.assertEqual(".aider*", gitignore.read_text().splitlines()[0])

    def test_check_gitignore(self):
-        make_repo()
-        io = InputOutput(pretty=False, yes=True)
-        cwd = Path.cwd()
-        gitignore = cwd / ".gitignore"
+        with tempfile.NamedTemporaryFile() as temp_gitconfig:
+            os.environ['GIT_CONFIG_GLOBAL'] = temp_gitconfig.name
+            make_repo()
+            io = InputOutput(pretty=False, yes=True)
+            cwd = Path.cwd()
+            gitignore = cwd / ".gitignore"

-        self.assertFalse(gitignore.exists())
-        check_gitignore(cwd, io)
-        self.assertTrue(gitignore.exists())
+            self.assertFalse(gitignore.exists())
+            check_gitignore(cwd, io)
+            self.assertTrue(gitignore.exists())

-        self.assertEqual(".aider*", gitignore.read_text().splitlines()[0])
+            self.assertEqual(".aider*", gitignore.read_text().splitlines()[0])

-        gitignore.write_text("one\ntwo\n")
-        check_gitignore(cwd, io)
-        self.assertEqual("one\ntwo\n.aider*\n", gitignore.read_text())
+            gitignore.write_text("one\ntwo\n")
+            check_gitignore(cwd, io)
+            self.assertEqual("one\ntwo\n.aider*\n", gitignore.read_text())
+            del os.environ['GIT_CONFIG_GLOBAL']

    def test_main_git_ignore(self):
        cwd = Path().cwd()