mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-01 02:05:00 +00:00
Improved prompting for both GPT-4 and GPT-4 Turbo:
- Uses 2-shot examples, which highlight the need to: - Use multiple SEARCH/REPLACE blocks to make changes spread across multiple locations in a file (rather one "big" S/R with ... elides ... of unchanging chunks of code). - Include comments, docstrings, etc in the S/R blocks. - Updated list of explicit "rules" for constructing S/R blocks. For `gpt-4-1106-preview` the new prompts reduced `num_errors` from 25-30 -> 12-14, indicating improved ability to generate proper SEARCH/REPLACE blocks. Benchmark results of 54%/65% are stable or perhaps slightly better than the previous prompts. For `gpt-4-0613` the new prompts produce a major improvement in benchmark results. Performance leaps from 47%/64% up to 51%/71%. This now makes `aider + gpt-4-0613` better at coding than `aider + gpt-4-1106-preview` after the 2nd try. And competitive after the 1st try. benchmarks/2023-11-14-19-34-51--preview-prompting-positivity: test-cases: 133 model: gpt-4-1106-preview edit_format: diff commit_hash:9cbe114
num_error_outputs: 14 num_user_asks: 0 num_exhausted_context_windows 0 test_timeouts: 2 53.4% correct after try 0 64.7% correct after try 1 duration: 23.6 sec/test-case costs: $0.0495/test-case, $6.58 total, $6.58 projected benchmarks/2023-11-14-19-55-31--preview-prompt-against-0613 test-cases: 133 model: gpt-4-0613 edit_format: diff commit_hash:9cbe114
num_error_outputs: 10 num_user_asks: 0 num_exhausted_context_windows 0 test_timeouts: 1 51.1% correct after try 0 70.7% correct after try 1 duration: 52.5 sec/test-case costs: $0.1229/test-case, $16.34 total, $16.34 projected
This commit is contained in:
commit
0f9053008a
5 changed files with 225 additions and 60 deletions
|
@ -316,10 +316,6 @@ class Coder:
|
||||||
dict(role="user", content=all_content),
|
dict(role="user", content=all_content),
|
||||||
dict(role="assistant", content="Ok."),
|
dict(role="assistant", content="Ok."),
|
||||||
]
|
]
|
||||||
if self.abs_fnames:
|
|
||||||
files_messages += [
|
|
||||||
dict(role="system", content=self.fmt_system_reminder()),
|
|
||||||
]
|
|
||||||
|
|
||||||
return files_messages
|
return files_messages
|
||||||
|
|
||||||
|
@ -415,21 +411,14 @@ class Coder:
|
||||||
|
|
||||||
return self.send_new_user_message(inp)
|
return self.send_new_user_message(inp)
|
||||||
|
|
||||||
def fmt_system_reminder(self):
|
def fmt_system_prompt(self, prompt):
|
||||||
prompt = self.gpt_prompts.system_reminder
|
|
||||||
prompt = prompt.format(fence=self.fence)
|
prompt = prompt.format(fence=self.fence)
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
def send_new_user_message(self, inp):
|
def format_messages(self):
|
||||||
self.choose_fence()
|
self.choose_fence()
|
||||||
|
main_sys = self.fmt_system_prompt(self.gpt_prompts.main_system)
|
||||||
self.cur_messages += [
|
main_sys += "\n" + self.fmt_system_prompt(self.gpt_prompts.system_reminder)
|
||||||
dict(role="user", content=inp),
|
|
||||||
]
|
|
||||||
|
|
||||||
main_sys = self.gpt_prompts.main_system
|
|
||||||
# if self.main_model.max_context_tokens > 4 * 1024:
|
|
||||||
main_sys += "\n" + self.fmt_system_reminder()
|
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
dict(role="system", content=main_sys),
|
dict(role="system", content=main_sys),
|
||||||
|
@ -438,8 +427,36 @@ class Coder:
|
||||||
self.summarize_end()
|
self.summarize_end()
|
||||||
messages += self.done_messages
|
messages += self.done_messages
|
||||||
messages += self.get_files_messages()
|
messages += self.get_files_messages()
|
||||||
|
|
||||||
|
reminder_message = [
|
||||||
|
dict(role="system", content=self.fmt_system_prompt(self.gpt_prompts.system_reminder)),
|
||||||
|
]
|
||||||
|
|
||||||
|
messages_tokens = self.main_model.token_count(messages)
|
||||||
|
reminder_tokens = self.main_model.token_count(reminder_message)
|
||||||
|
cur_tokens = self.main_model.token_count(self.cur_messages)
|
||||||
|
|
||||||
|
if None not in (messages_tokens, reminder_tokens, cur_tokens):
|
||||||
|
total_tokens = messages_tokens + reminder_tokens + cur_tokens
|
||||||
|
else:
|
||||||
|
# add the reminder anyway
|
||||||
|
total_tokens = 0
|
||||||
|
|
||||||
|
# Add the reminder prompt if we still have room to include it.
|
||||||
|
if total_tokens < self.main_model.max_context_tokens:
|
||||||
|
messages += reminder_message
|
||||||
|
|
||||||
messages += self.cur_messages
|
messages += self.cur_messages
|
||||||
|
|
||||||
|
return messages
|
||||||
|
|
||||||
|
def send_new_user_message(self, inp):
|
||||||
|
self.cur_messages += [
|
||||||
|
dict(role="user", content=inp),
|
||||||
|
]
|
||||||
|
|
||||||
|
messages = self.format_messages()
|
||||||
|
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
utils.show_messages(messages, functions=self.functions)
|
utils.show_messages(messages, functions=self.functions)
|
||||||
|
|
||||||
|
|
|
@ -7,56 +7,182 @@ class EditBlockPrompts(CoderPrompts):
|
||||||
main_system = """Act as an expert software developer.
|
main_system = """Act as an expert software developer.
|
||||||
Always use best practices when coding.
|
Always use best practices when coding.
|
||||||
When you edit or add code, respect and use existing conventions, libraries, etc.
|
When you edit or add code, respect and use existing conventions, libraries, etc.
|
||||||
|
Always COMPLETELY IMPLEMENT the needed code.
|
||||||
|
|
||||||
Take requests for changes to the supplied code.
|
Take requests for changes to the supplied code.
|
||||||
If the request is ambiguous, ask questions.
|
If the request is ambiguous, ask questions.
|
||||||
|
|
||||||
Once you understand the request you MUST:
|
Once you understand the request you MUST:
|
||||||
1. List the files you need to modify. *NEVER* suggest changes to a *read-only* file. Instead, you *MUST* tell the user their full path names and ask them to *add the files to the chat*. End your reply and wait for their approval.
|
1. List the files you need to modify. Only suggest changes to a *read-write* files. Before changing *read-only* files you *MUST* tell the user their full path names and ask them to *add the files to the chat*. End your reply and wait for their approval.
|
||||||
2. Think step-by-step and explain the needed changes.
|
2. Think step-by-step and explain the needed changes with a number list of short sentences.
|
||||||
3. Describe each change with a *SEARCH/REPLACE block* per the example below.
|
3. Describe each change with a *SEARCH/REPLACE block* per the examples below.
|
||||||
"""
|
|
||||||
|
|
||||||
system_reminder = """You MUST use a *SEARCH/REPLACE block* to modify the source file:
|
All changes to files must use this *SEARCH/REPLACE block* format.
|
||||||
|
|
||||||
|
# Example conversation 1:
|
||||||
|
|
||||||
|
## USER: These are the *read-write* files:
|
||||||
|
|
||||||
|
mathweb/flask/app.py
|
||||||
|
{fence[0]}python
|
||||||
|
from flask import Flask
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
@app.route('/div/<int:n>/<int:d>')
|
||||||
|
def divide(n, d):
|
||||||
|
if d == 0:
|
||||||
|
return 'Error: Division by zero'
|
||||||
|
else:
|
||||||
|
result = n / d
|
||||||
|
return str(result)
|
||||||
|
|
||||||
|
def factorial(n):
|
||||||
|
"compute factorial"
|
||||||
|
|
||||||
|
if n == 0:
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
return n * factorial(n-1)
|
||||||
|
|
||||||
|
@app.route('/factorial/<int:n>')
|
||||||
|
def get_factorial(n):
|
||||||
|
return str(factorial(n))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print("Starting...")
|
||||||
|
app.run()
|
||||||
|
{fence[1]}
|
||||||
|
|
||||||
|
## ASSISTANT: Ok.
|
||||||
|
|
||||||
|
## USER: Change get_factorial() to use math.factorial
|
||||||
|
|
||||||
|
## ASSISTANT:
|
||||||
|
|
||||||
|
Do make this change we need to modify `mathweb/flask/app.py` to:
|
||||||
|
|
||||||
|
1. Import the math package.
|
||||||
|
2. Remove the existing factorial() function.
|
||||||
|
3. Update get_factorial() to call math.factorial instead.
|
||||||
|
|
||||||
|
Here are the *SEARCH/REPLACE* blocks:
|
||||||
|
|
||||||
|
mathweb/flask/app.py
|
||||||
{fence[0]}python
|
{fence[0]}python
|
||||||
some/dir/example.py
|
|
||||||
<<<<<<< SEARCH
|
<<<<<<< SEARCH
|
||||||
# Multiplication function
|
from flask import Flask
|
||||||
def multiply(a,b)
|
|
||||||
"multiply 2 numbers"
|
|
||||||
|
|
||||||
return a*b
|
|
||||||
=======
|
=======
|
||||||
# Addition function
|
import math
|
||||||
def add(a,b):
|
from flask import Flask
|
||||||
"add 2 numbers"
|
|
||||||
|
|
||||||
return a+b
|
|
||||||
>>>>>>> REPLACE
|
>>>>>>> REPLACE
|
||||||
{fence[1]}
|
{fence[1]}
|
||||||
|
|
||||||
The *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character.
|
mathweb/flask/app.py
|
||||||
The *SEARCH/REPLACE block* must be concise.
|
{fence[0]}python
|
||||||
Include just enough lines to uniquely specify the change.
|
<<<<<<< SEARCH
|
||||||
Don't include extra unchanging lines.
|
def factorial(n):
|
||||||
|
"compute factorial"
|
||||||
|
|
||||||
Every *SEARCH/REPLACE block* must be fenced with {fence[0]} and {fence[1]}, with the correct code language.
|
if n == 0:
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
return n * factorial(n-1)
|
||||||
|
|
||||||
Every *SEARCH/REPLACE block* must start with the full path!
|
=======
|
||||||
NEVER try to *SEARCH/REPLACE* any *read-only* files.
|
>>>>>>> REPLACE
|
||||||
|
{fence[1]}
|
||||||
|
|
||||||
|
mathweb/flask/app.py
|
||||||
|
{fence[0]}python
|
||||||
|
<<<<<<< SEARCH
|
||||||
|
return str(factorial(n))
|
||||||
|
=======
|
||||||
|
return str(math.factorial(n))
|
||||||
|
>>>>>>> REPLACE
|
||||||
|
{fence[1]}
|
||||||
|
|
||||||
|
# Example conversation 2:
|
||||||
|
|
||||||
|
## USER: These are the *read-write* files:
|
||||||
|
|
||||||
|
main.py
|
||||||
|
{fence[0]}python
|
||||||
|
def hello():
|
||||||
|
"print a greeting"
|
||||||
|
|
||||||
|
print("hello")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
hello()
|
||||||
|
{fence[1]}
|
||||||
|
|
||||||
|
## ASSISTANT: Ok.
|
||||||
|
|
||||||
|
## USER: Refactor hello() into its own file.
|
||||||
|
|
||||||
|
## ASSISTANT:
|
||||||
|
|
||||||
|
Do make this change we need to modify `main.py` and make a new file `hello.py`:
|
||||||
|
|
||||||
|
1. Make a new hello.py file with hello() in it.
|
||||||
|
2. Remove hello() from main.py and replace it with an import.
|
||||||
|
|
||||||
|
Here are the *SEARCH/REPLACE* blocks:
|
||||||
|
|
||||||
|
hello.py
|
||||||
|
{fence[0]}python
|
||||||
|
<<<<<<< SEARCH
|
||||||
|
=======
|
||||||
|
def hello():
|
||||||
|
"print a greeting"
|
||||||
|
|
||||||
|
print("hello")
|
||||||
|
>>>>>>> REPLACE
|
||||||
|
{fence[1]}
|
||||||
|
|
||||||
|
main.py
|
||||||
|
{fence[0]}python
|
||||||
|
<<<<<<< SEARCH
|
||||||
|
def hello():
|
||||||
|
"print a greeting"
|
||||||
|
|
||||||
|
print("hello")
|
||||||
|
=======
|
||||||
|
from hello import hello
|
||||||
|
>>>>>>> REPLACE
|
||||||
|
{fence[1]}
|
||||||
|
|
||||||
|
# Rules
|
||||||
|
"""
|
||||||
|
|
||||||
|
system_reminder = """Every *SEARCH/REPLACE block* must use this format:
|
||||||
|
1. The file path alone on a line, eg: main.py
|
||||||
|
2. The opening fence and code language, eg: {fence[0]}python
|
||||||
|
3. The start of search block: <<<<<<< SEARCH
|
||||||
|
4. A contiguous chunk of lines to search for in the existing source code
|
||||||
|
5. The dividing line: =======
|
||||||
|
6. The lines to replace into the source code
|
||||||
|
7. The end of the replace block: >>>>>>> REPLACE
|
||||||
|
8. The closing fence: {fence[1]}
|
||||||
|
|
||||||
|
Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
|
||||||
|
|
||||||
|
Include *ALL* the code being searched and replaced!
|
||||||
|
|
||||||
|
Only *SEARCH/REPLACE* files that are *read-write*.
|
||||||
|
|
||||||
If you want to put code in a new file, use a *SEARCH/REPLACE block* with:
|
If you want to put code in a new file, use a *SEARCH/REPLACE block* with:
|
||||||
- A new file path, including dir name if needed
|
- A new file path, including dir name if needed
|
||||||
- An empty `SEARCH` section
|
- An empty `SEARCH` section
|
||||||
- The new file's contents in the `updated` section
|
- The new file's contents in the `REPLACE` section
|
||||||
"""
|
"""
|
||||||
|
|
||||||
files_content_prefix = "These are the *read-write* files:\n"
|
files_content_prefix = "These are the *read-write* files:\n"
|
||||||
|
|
||||||
files_no_full_files = "I am not sharing any *read-write* files yet."
|
files_no_full_files = "I am not sharing any *read-write* files yet."
|
||||||
|
|
||||||
repo_content_prefix = """Below here are summaries of other files!
|
repo_content_prefix = """Below here are summaries of other files present in this git repository.
|
||||||
Do not propose changes to these files, they are *read-only*.
|
Do not propose changes to these files, they are *read-only*.
|
||||||
To make a file *read-write*, ask me to *add it to the chat*.
|
To make a file *read-write*, ask the user to *add it to the chat*.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
@ -104,20 +103,27 @@ class Commands:
|
||||||
|
|
||||||
res = []
|
res = []
|
||||||
|
|
||||||
|
self.coder.choose_fence()
|
||||||
|
|
||||||
# system messages
|
# system messages
|
||||||
|
main_sys = self.coder.fmt_system_prompt(self.coder.gpt_prompts.main_system)
|
||||||
|
main_sys += "\n" + self.coder.fmt_system_prompt(self.coder.gpt_prompts.system_reminder)
|
||||||
msgs = [
|
msgs = [
|
||||||
dict(role="system", content=self.coder.gpt_prompts.main_system),
|
dict(role="system", content=main_sys),
|
||||||
dict(role="system", content=self.coder.gpt_prompts.system_reminder),
|
dict(
|
||||||
|
role="system",
|
||||||
|
content=self.coder.fmt_system_prompt(self.coder.gpt_prompts.system_reminder),
|
||||||
|
),
|
||||||
]
|
]
|
||||||
tokens = len(self.tokenizer.encode(json.dumps(msgs)))
|
|
||||||
|
tokens = self.coder.main_model.token_count(msgs)
|
||||||
res.append((tokens, "system messages", ""))
|
res.append((tokens, "system messages", ""))
|
||||||
|
|
||||||
# chat history
|
# chat history
|
||||||
msgs = self.coder.done_messages + self.coder.cur_messages
|
msgs = self.coder.done_messages + self.coder.cur_messages
|
||||||
if msgs:
|
if msgs:
|
||||||
msgs = [dict(role="dummy", content=msg) for msg in msgs]
|
msgs = [dict(role="dummy", content=msg) for msg in msgs]
|
||||||
msgs = json.dumps(msgs)
|
tokens = self.coder.main_model.token_count(msgs)
|
||||||
tokens = len(self.tokenizer.encode(msgs))
|
|
||||||
res.append((tokens, "chat history", "use /clear to clear"))
|
res.append((tokens, "chat history", "use /clear to clear"))
|
||||||
|
|
||||||
# repo map
|
# repo map
|
||||||
|
@ -125,7 +131,7 @@ class Commands:
|
||||||
if self.coder.repo_map:
|
if self.coder.repo_map:
|
||||||
repo_content = self.coder.repo_map.get_repo_map(self.coder.abs_fnames, other_files)
|
repo_content = self.coder.repo_map.get_repo_map(self.coder.abs_fnames, other_files)
|
||||||
if repo_content:
|
if repo_content:
|
||||||
tokens = len(self.tokenizer.encode(repo_content))
|
tokens = self.coder.main_model.token_count(repo_content)
|
||||||
res.append((tokens, "repository map", "use --map-tokens to resize"))
|
res.append((tokens, "repository map", "use --map-tokens to resize"))
|
||||||
|
|
||||||
# files
|
# files
|
||||||
|
@ -134,7 +140,7 @@ class Commands:
|
||||||
content = self.io.read_text(fname)
|
content = self.io.read_text(fname)
|
||||||
# approximate
|
# approximate
|
||||||
content = f"{relative_fname}\n```\n" + content + "```\n"
|
content = f"{relative_fname}\n```\n" + content + "```\n"
|
||||||
tokens = len(self.tokenizer.encode(content))
|
tokens = self.coder.main_model.token_count(content)
|
||||||
res.append((tokens, f"{relative_fname}", "use /drop to drop from chat"))
|
res.append((tokens, f"{relative_fname}", "use /drop to drop from chat"))
|
||||||
|
|
||||||
self.io.tool_output("Approximate context window usage, in tokens:")
|
self.io.tool_output("Approximate context window usage, in tokens:")
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
import json
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
|
|
||||||
|
@ -37,3 +39,14 @@ class Model:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def commit_message_models():
|
def commit_message_models():
|
||||||
return [Model.create("gpt-3.5-turbo"), Model.create("gpt-3.5-turbo-16k")]
|
return [Model.create("gpt-3.5-turbo"), Model.create("gpt-3.5-turbo-16k")]
|
||||||
|
|
||||||
|
def token_count(self, messages):
|
||||||
|
if not self.tokenizer:
|
||||||
|
return
|
||||||
|
|
||||||
|
if type(messages) is str:
|
||||||
|
msgs = messages
|
||||||
|
else:
|
||||||
|
msgs = json.dumps(messages)
|
||||||
|
|
||||||
|
return len(self.tokenizer.encode(msgs))
|
||||||
|
|
|
@ -113,20 +113,23 @@ class TestMain(TestCase):
|
||||||
self.assertEqual(".aider*", gitignore.read_text().splitlines()[0])
|
self.assertEqual(".aider*", gitignore.read_text().splitlines()[0])
|
||||||
|
|
||||||
def test_check_gitignore(self):
|
def test_check_gitignore(self):
|
||||||
make_repo()
|
with tempfile.NamedTemporaryFile() as temp_gitconfig:
|
||||||
io = InputOutput(pretty=False, yes=True)
|
os.environ['GIT_CONFIG_GLOBAL'] = temp_gitconfig.name
|
||||||
cwd = Path.cwd()
|
make_repo()
|
||||||
gitignore = cwd / ".gitignore"
|
io = InputOutput(pretty=False, yes=True)
|
||||||
|
cwd = Path.cwd()
|
||||||
|
gitignore = cwd / ".gitignore"
|
||||||
|
|
||||||
self.assertFalse(gitignore.exists())
|
self.assertFalse(gitignore.exists())
|
||||||
check_gitignore(cwd, io)
|
check_gitignore(cwd, io)
|
||||||
self.assertTrue(gitignore.exists())
|
self.assertTrue(gitignore.exists())
|
||||||
|
|
||||||
self.assertEqual(".aider*", gitignore.read_text().splitlines()[0])
|
self.assertEqual(".aider*", gitignore.read_text().splitlines()[0])
|
||||||
|
|
||||||
gitignore.write_text("one\ntwo\n")
|
gitignore.write_text("one\ntwo\n")
|
||||||
check_gitignore(cwd, io)
|
check_gitignore(cwd, io)
|
||||||
self.assertEqual("one\ntwo\n.aider*\n", gitignore.read_text())
|
self.assertEqual("one\ntwo\n.aider*\n", gitignore.read_text())
|
||||||
|
del os.environ['GIT_CONFIG_GLOBAL']
|
||||||
|
|
||||||
def test_main_git_ignore(self):
|
def test_main_git_ignore(self):
|
||||||
cwd = Path().cwd()
|
cwd = Path().cwd()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue