Improved prompting for both GPT-4 and GPT-4 Turbo:

- Uses 2-shot examples, which highlight the need to:
  - Use multiple SEARCH/REPLACE blocks to make changes spread across multiple locations in a file (rather one "big" S/R with ... elides ... of unchanging chunks of code).
  - Include comments, docstrings, etc in the S/R blocks.
- Updated list of explicit "rules" for constructing S/R blocks.

For `gpt-4-1106-preview` the new prompts reduced `num_errors` from 25-30 -> 12-14, indicating improved ability to generate proper SEARCH/REPLACE blocks. Benchmark results of 54%/65% are stable or perhaps slightly better than the previous prompts.

For `gpt-4-0613` the new prompts produce a major improvement in benchmark results. Performance leaps from 47%/64% up to 51%/71%. This now makes `aider + gpt-4-0613` better at coding than `aider + gpt-4-1106-preview` after the 2nd try. And competitive after the 1st try.

benchmarks/2023-11-14-19-34-51--preview-prompting-positivity:

test-cases: 133
model: gpt-4-1106-preview
edit_format: diff
commit_hash: 9cbe114
num_error_outputs: 14
num_user_asks: 0
num_exhausted_context_windows 0
test_timeouts: 2

53.4% correct after try 0
64.7% correct after try 1

duration: 23.6 sec/test-case
costs: $0.0495/test-case, $6.58 total, $6.58 projected

benchmarks/2023-11-14-19-55-31--preview-prompt-against-0613

test-cases: 133
model: gpt-4-0613
edit_format: diff
commit_hash: 9cbe114
num_error_outputs: 10
num_user_asks: 0
num_exhausted_context_windows 0
test_timeouts: 1

51.1% correct after try 0
70.7% correct after try 1

duration: 52.5 sec/test-case
costs: $0.1229/test-case, $16.34 total, $16.34 projected
This commit is contained in:
Paul Gauthier 2023-11-14 13:11:31 -08:00
commit 0f9053008a
5 changed files with 225 additions and 60 deletions

View file

@ -316,10 +316,6 @@ class Coder:
dict(role="user", content=all_content),
dict(role="assistant", content="Ok."),
]
if self.abs_fnames:
files_messages += [
dict(role="system", content=self.fmt_system_reminder()),
]
return files_messages
@ -415,21 +411,14 @@ class Coder:
return self.send_new_user_message(inp)
def fmt_system_reminder(self):
prompt = self.gpt_prompts.system_reminder
def fmt_system_prompt(self, prompt):
prompt = prompt.format(fence=self.fence)
return prompt
def send_new_user_message(self, inp):
def format_messages(self):
self.choose_fence()
self.cur_messages += [
dict(role="user", content=inp),
]
main_sys = self.gpt_prompts.main_system
# if self.main_model.max_context_tokens > 4 * 1024:
main_sys += "\n" + self.fmt_system_reminder()
main_sys = self.fmt_system_prompt(self.gpt_prompts.main_system)
main_sys += "\n" + self.fmt_system_prompt(self.gpt_prompts.system_reminder)
messages = [
dict(role="system", content=main_sys),
@ -438,8 +427,36 @@ class Coder:
self.summarize_end()
messages += self.done_messages
messages += self.get_files_messages()
reminder_message = [
dict(role="system", content=self.fmt_system_prompt(self.gpt_prompts.system_reminder)),
]
messages_tokens = self.main_model.token_count(messages)
reminder_tokens = self.main_model.token_count(reminder_message)
cur_tokens = self.main_model.token_count(self.cur_messages)
if None not in (messages_tokens, reminder_tokens, cur_tokens):
total_tokens = messages_tokens + reminder_tokens + cur_tokens
else:
# add the reminder anyway
total_tokens = 0
# Add the reminder prompt if we still have room to include it.
if total_tokens < self.main_model.max_context_tokens:
messages += reminder_message
messages += self.cur_messages
return messages
def send_new_user_message(self, inp):
self.cur_messages += [
dict(role="user", content=inp),
]
messages = self.format_messages()
if self.verbose:
utils.show_messages(messages, functions=self.functions)

View file

@ -7,56 +7,182 @@ class EditBlockPrompts(CoderPrompts):
main_system = """Act as an expert software developer.
Always use best practices when coding.
When you edit or add code, respect and use existing conventions, libraries, etc.
Always COMPLETELY IMPLEMENT the needed code.
Take requests for changes to the supplied code.
If the request is ambiguous, ask questions.
Once you understand the request you MUST:
1. List the files you need to modify. *NEVER* suggest changes to a *read-only* file. Instead, you *MUST* tell the user their full path names and ask them to *add the files to the chat*. End your reply and wait for their approval.
2. Think step-by-step and explain the needed changes.
3. Describe each change with a *SEARCH/REPLACE block* per the example below.
"""
1. List the files you need to modify. Only suggest changes to a *read-write* files. Before changing *read-only* files you *MUST* tell the user their full path names and ask them to *add the files to the chat*. End your reply and wait for their approval.
2. Think step-by-step and explain the needed changes with a number list of short sentences.
3. Describe each change with a *SEARCH/REPLACE block* per the examples below.
system_reminder = """You MUST use a *SEARCH/REPLACE block* to modify the source file:
All changes to files must use this *SEARCH/REPLACE block* format.
# Example conversation 1:
## USER: These are the *read-write* files:
mathweb/flask/app.py
{fence[0]}python
from flask import Flask
app = Flask(__name__)
@app.route('/div/<int:n>/<int:d>')
def divide(n, d):
if d == 0:
return 'Error: Division by zero'
else:
result = n / d
return str(result)
def factorial(n):
"compute factorial"
if n == 0:
return 1
else:
return n * factorial(n-1)
@app.route('/factorial/<int:n>')
def get_factorial(n):
return str(factorial(n))
if __name__ == '__main__':
print("Starting...")
app.run()
{fence[1]}
## ASSISTANT: Ok.
## USER: Change get_factorial() to use math.factorial
## ASSISTANT:
Do make this change we need to modify `mathweb/flask/app.py` to:
1. Import the math package.
2. Remove the existing factorial() function.
3. Update get_factorial() to call math.factorial instead.
Here are the *SEARCH/REPLACE* blocks:
mathweb/flask/app.py
{fence[0]}python
some/dir/example.py
<<<<<<< SEARCH
# Multiplication function
def multiply(a,b)
"multiply 2 numbers"
return a*b
from flask import Flask
=======
# Addition function
def add(a,b):
"add 2 numbers"
return a+b
import math
from flask import Flask
>>>>>>> REPLACE
{fence[1]}
The *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character.
The *SEARCH/REPLACE block* must be concise.
Include just enough lines to uniquely specify the change.
Don't include extra unchanging lines.
mathweb/flask/app.py
{fence[0]}python
<<<<<<< SEARCH
def factorial(n):
"compute factorial"
Every *SEARCH/REPLACE block* must be fenced with {fence[0]} and {fence[1]}, with the correct code language.
if n == 0:
return 1
else:
return n * factorial(n-1)
Every *SEARCH/REPLACE block* must start with the full path!
NEVER try to *SEARCH/REPLACE* any *read-only* files.
=======
>>>>>>> REPLACE
{fence[1]}
mathweb/flask/app.py
{fence[0]}python
<<<<<<< SEARCH
return str(factorial(n))
=======
return str(math.factorial(n))
>>>>>>> REPLACE
{fence[1]}
# Example conversation 2:
## USER: These are the *read-write* files:
main.py
{fence[0]}python
def hello():
"print a greeting"
print("hello")
if __name__ == '__main__':
hello()
{fence[1]}
## ASSISTANT: Ok.
## USER: Refactor hello() into its own file.
## ASSISTANT:
Do make this change we need to modify `main.py` and make a new file `hello.py`:
1. Make a new hello.py file with hello() in it.
2. Remove hello() from main.py and replace it with an import.
Here are the *SEARCH/REPLACE* blocks:
hello.py
{fence[0]}python
<<<<<<< SEARCH
=======
def hello():
"print a greeting"
print("hello")
>>>>>>> REPLACE
{fence[1]}
main.py
{fence[0]}python
<<<<<<< SEARCH
def hello():
"print a greeting"
print("hello")
=======
from hello import hello
>>>>>>> REPLACE
{fence[1]}
# Rules
"""
system_reminder = """Every *SEARCH/REPLACE block* must use this format:
1. The file path alone on a line, eg: main.py
2. The opening fence and code language, eg: {fence[0]}python
3. The start of search block: <<<<<<< SEARCH
4. A contiguous chunk of lines to search for in the existing source code
5. The dividing line: =======
6. The lines to replace into the source code
7. The end of the replace block: >>>>>>> REPLACE
8. The closing fence: {fence[1]}
Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
Include *ALL* the code being searched and replaced!
Only *SEARCH/REPLACE* files that are *read-write*.
If you want to put code in a new file, use a *SEARCH/REPLACE block* with:
- A new file path, including dir name if needed
- An empty `SEARCH` section
- The new file's contents in the `updated` section
- The new file's contents in the `REPLACE` section
"""
files_content_prefix = "These are the *read-write* files:\n"
files_no_full_files = "I am not sharing any *read-write* files yet."
repo_content_prefix = """Below here are summaries of other files!
repo_content_prefix = """Below here are summaries of other files present in this git repository.
Do not propose changes to these files, they are *read-only*.
To make a file *read-write*, ask me to *add it to the chat*.
To make a file *read-write*, ask the user to *add it to the chat*.
"""

View file

@ -1,4 +1,3 @@
import json
import re
import subprocess
import sys
@ -104,20 +103,27 @@ class Commands:
res = []
self.coder.choose_fence()
# system messages
main_sys = self.coder.fmt_system_prompt(self.coder.gpt_prompts.main_system)
main_sys += "\n" + self.coder.fmt_system_prompt(self.coder.gpt_prompts.system_reminder)
msgs = [
dict(role="system", content=self.coder.gpt_prompts.main_system),
dict(role="system", content=self.coder.gpt_prompts.system_reminder),
dict(role="system", content=main_sys),
dict(
role="system",
content=self.coder.fmt_system_prompt(self.coder.gpt_prompts.system_reminder),
),
]
tokens = len(self.tokenizer.encode(json.dumps(msgs)))
tokens = self.coder.main_model.token_count(msgs)
res.append((tokens, "system messages", ""))
# chat history
msgs = self.coder.done_messages + self.coder.cur_messages
if msgs:
msgs = [dict(role="dummy", content=msg) for msg in msgs]
msgs = json.dumps(msgs)
tokens = len(self.tokenizer.encode(msgs))
tokens = self.coder.main_model.token_count(msgs)
res.append((tokens, "chat history", "use /clear to clear"))
# repo map
@ -125,7 +131,7 @@ class Commands:
if self.coder.repo_map:
repo_content = self.coder.repo_map.get_repo_map(self.coder.abs_fnames, other_files)
if repo_content:
tokens = len(self.tokenizer.encode(repo_content))
tokens = self.coder.main_model.token_count(repo_content)
res.append((tokens, "repository map", "use --map-tokens to resize"))
# files
@ -134,7 +140,7 @@ class Commands:
content = self.io.read_text(fname)
# approximate
content = f"{relative_fname}\n```\n" + content + "```\n"
tokens = len(self.tokenizer.encode(content))
tokens = self.coder.main_model.token_count(content)
res.append((tokens, f"{relative_fname}", "use /drop to drop from chat"))
self.io.tool_output("Approximate context window usage, in tokens:")

View file

@ -1,3 +1,5 @@
import json
import openai
@ -37,3 +39,14 @@ class Model:
@staticmethod
def commit_message_models():
return [Model.create("gpt-3.5-turbo"), Model.create("gpt-3.5-turbo-16k")]
def token_count(self, messages):
if not self.tokenizer:
return
if type(messages) is str:
msgs = messages
else:
msgs = json.dumps(messages)
return len(self.tokenizer.encode(msgs))

View file

@ -113,20 +113,23 @@ class TestMain(TestCase):
self.assertEqual(".aider*", gitignore.read_text().splitlines()[0])
def test_check_gitignore(self):
make_repo()
io = InputOutput(pretty=False, yes=True)
cwd = Path.cwd()
gitignore = cwd / ".gitignore"
with tempfile.NamedTemporaryFile() as temp_gitconfig:
os.environ['GIT_CONFIG_GLOBAL'] = temp_gitconfig.name
make_repo()
io = InputOutput(pretty=False, yes=True)
cwd = Path.cwd()
gitignore = cwd / ".gitignore"
self.assertFalse(gitignore.exists())
check_gitignore(cwd, io)
self.assertTrue(gitignore.exists())
self.assertFalse(gitignore.exists())
check_gitignore(cwd, io)
self.assertTrue(gitignore.exists())
self.assertEqual(".aider*", gitignore.read_text().splitlines()[0])
self.assertEqual(".aider*", gitignore.read_text().splitlines()[0])
gitignore.write_text("one\ntwo\n")
check_gitignore(cwd, io)
self.assertEqual("one\ntwo\n.aider*\n", gitignore.read_text())
gitignore.write_text("one\ntwo\n")
check_gitignore(cwd, io)
self.assertEqual("one\ntwo\n.aider*\n", gitignore.read_text())
del os.environ['GIT_CONFIG_GLOBAL']
def test_main_git_ignore(self):
cwd = Path().cwd()