From 50f457d7532690079ca3676dc6e8f07fa50d8d57 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Mon, 13 Nov 2023 14:59:19 -0800 Subject: [PATCH 01/16] prompt with clear example using multiple S/R blocks on the same file --- aider/coders/editblock_prompts.py | 92 ++++++++++++++++++++++++++----- 1 file changed, 78 insertions(+), 14 deletions(-) diff --git a/aider/coders/editblock_prompts.py b/aider/coders/editblock_prompts.py index bbfd5a00a..044d569ac 100644 --- a/aider/coders/editblock_prompts.py +++ b/aider/coders/editblock_prompts.py @@ -17,27 +17,91 @@ Once you understand the request you MUST: 3. Describe each change with a *SEARCH/REPLACE block* per the example below. """ - system_reminder = """You MUST use a *SEARCH/REPLACE block* to modify the source file: + system_reminder = """# Example +Here is an example conversation: + +## USER: These are the *read-write* files: + +mathweb/flask/app.py +{fence[0]}python +from flask import Flask + +app = Flask(__name__) + +@app.route('/div//') +def divide(n, d): + if d == 0: + return 'Error: Division by zero' + else: + result = n / d + return str(result) + +def factorial(n): + if n == 0: + return 1 + else: + return n * factorial(n-1) + +@app.route('/factorial/') +def get_factorial(n): + return str(factorial(n)) + +if __name__ == '__main__': + print("Starting...") + app.run() +{fence[1]} + +## ASSISTANT: Ok. + +## USER: Change get_factorial() to use math.factorial + +## ASSISTANT: + +Do make this change we need to modify `mathweb/flask/app.py` to: + +1. Import the math package. +2. Remove the existing factorial() function. +3. Update get_factorial() to call math.factorial instead. + +Here are the *SEARCH/REPLACE* blocks: + +mathweb/flask/app.py {fence[0]}python -some/dir/example.py <<<<<<< SEARCH - # Multiplication function - def multiply(a,b) - "multiply 2 numbers" - - return a*b +from flask import Flask ======= - # Addition function - def add(a,b): - "add 2 numbers" - - return a+b +import math +from flask import Flask >>>>>>> REPLACE {fence[1]} -The *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character. -The *SEARCH/REPLACE block* must be concise. +mathweb/flask/app.py +{fence[0]}python +<<<<<<< SEARCH +def factorial(n): + if n == 0: + return 1 + else: + return n * factorial(n-1) + +======= +>>>>>>> REPLACE +{fence[1]} + +mathweb/flask/app.py +{fence[0]}python +<<<<<<< SEARCH + return str(factorial(n)) +======= + return str(math.factorial(n)) +>>>>>>> REPLACE +{fence[1]} + +# Rules + +Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character. +Every *SEARCH/REPLACE block* must be concise. Include just enough lines to uniquely specify the change. Don't include extra unchanging lines. From 8a3d4bf14c42355ca940a3de2903c050724d47ec Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Mon, 13 Nov 2023 15:07:15 -0800 Subject: [PATCH 02/16] added second example showing new file --- aider/coders/editblock_prompts.py | 49 +++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/aider/coders/editblock_prompts.py b/aider/coders/editblock_prompts.py index 044d569ac..553239202 100644 --- a/aider/coders/editblock_prompts.py +++ b/aider/coders/editblock_prompts.py @@ -17,9 +17,7 @@ Once you understand the request you MUST: 3. Describe each change with a *SEARCH/REPLACE block* per the example below. """ - system_reminder = """# Example - -Here is an example conversation: + system_reminder = """# Example conversation 1: ## USER: These are the *read-write* files: @@ -98,6 +96,51 @@ mathweb/flask/app.py >>>>>>> REPLACE {fence[1]} +# Example conversation 2: + +## USER: These are the *read-write* files: + +main.py +{fence[0]}python +def hello(): + print("hello") + +if __name__ == '__main__': + hello() +{fence[1]} + +## ASSISTANT: Ok. + +## USER: Refactor hello() into its own file. + +## ASSISTANT: + +Do make this change we need to modify `main.py` and make a new file `hello.py`: + +1. Make a new hello.py file with hello() in it. +2. Remove hello() from main.py and replace it with an import. + +Here are the *SEARCH/REPLACE* blocks: + +hello.py +{fence[0]}python +<<<<<<< SEARCH +======= +def hello(): + print("hello") +>>>>>>> REPLACE +{fence[1]} + +main.py +{fence[0]}python +<<<<<<< SEARCH +def hello(): + print("hello") +======= +from hello import hello +>>>>>>> REPLACE +{fence[1]} + # Rules Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character. From c63885fe8c3e17bb4d3ce5bf1fc2fea1dceb8f12 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Mon, 13 Nov 2023 15:28:01 -0800 Subject: [PATCH 03/16] More rules, instructions --- aider/coders/editblock_prompts.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/aider/coders/editblock_prompts.py b/aider/coders/editblock_prompts.py index 553239202..52d9288f8 100644 --- a/aider/coders/editblock_prompts.py +++ b/aider/coders/editblock_prompts.py @@ -7,6 +7,7 @@ class EditBlockPrompts(CoderPrompts): main_system = """Act as an expert software developer. Always use best practices when coding. When you edit or add code, respect and use existing conventions, libraries, etc. +Always COMPLETELY IMPLEMENT the needed code. Never just leave comments about more work that will need to be done. Take requests for changes to the supplied code. If the request is ambiguous, ask questions. @@ -14,7 +15,7 @@ If the request is ambiguous, ask questions. Once you understand the request you MUST: 1. List the files you need to modify. *NEVER* suggest changes to a *read-only* file. Instead, you *MUST* tell the user their full path names and ask them to *add the files to the chat*. End your reply and wait for their approval. 2. Think step-by-step and explain the needed changes. -3. Describe each change with a *SEARCH/REPLACE block* per the example below. +3. Describe each change with a *SEARCH/REPLACE block* per the examples below. """ system_reminder = """# Example conversation 1: @@ -143,6 +144,16 @@ from hello import hello # Rules +Every *SEARCH/REPLACE block* must use this format: +1. The file path alone on a line, eg: main.py +2. The opening fence and code language, eg: {fence[0]}python +3. The start of search block: <<<<<<< SEARCH +4. A chunk of lines to search for in the existing source code +5. The dividing line: ======= +6. The lines to replace into the source code +7. The end of the replace block: >>>>>>> REPLACE +8. The closing fence: {fence[1]} + Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character. Every *SEARCH/REPLACE block* must be concise. Include just enough lines to uniquely specify the change. From 90f57664a65c91be1e53dbaeb066cc70e60eb5f8 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 14 Nov 2023 10:04:00 -0800 Subject: [PATCH 04/16] aider: Refactor `cmd_tokens` to use `main_model.token_count()` instead of calling tokenizer directly. # Aider chat conversation: USER: in `cmd_tokens` make use of main_model.token_count() rather than calling tokenizer directly ASSISTANT: To make this change, we need to modify `aider/commands.py`. Specifically, we will update the `cmd_tokens` method to use `self.coder.main_model.token_count()` instead of calling `self.tokenizer.encode()` directly. Here is the *SEARCH/REPLACE* block for the change: aider/commands.py python <<<<<<< SEARCH tokens = len(self.tokenizer.encode(json.dumps(msgs))) ======= tokens = self.coder.main_model.token_count(json.dumps(msgs)) >>>>>>> REPLACE aider/commands.py python <<<<<<< SEARCH tokens = len(self.tokenizer.encode(msgs)) ======= tokens = self.coder.main_model.token_count(msgs) >>>>>>> REPLACE aider/commands.py python <<<<<<< SEARCH tokens = len(self.tokenizer.encode(repo_content)) ======= tokens = self.coder.main_model.token_count(repo_content) >>>>>>> REPLACE aider/commands.py python <<<<<<< SEARCH tokens = len(self.tokenizer.encode(content)) ======= tokens = self.coder.main_model.token_count(content) >>>>>>> REPLACE These changes will ensure that the `cmd_tokens` method uses the `token_count` method from the `main_model` to calculate the number of tokens, which is a more abstracted and potentially model-specific way of counting tokens. --- aider/commands.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aider/commands.py b/aider/commands.py index e99840dc9..c4e9f4b15 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -109,7 +109,7 @@ class Commands: dict(role="system", content=self.coder.gpt_prompts.main_system), dict(role="system", content=self.coder.gpt_prompts.system_reminder), ] - tokens = len(self.tokenizer.encode(json.dumps(msgs))) + tokens = self.coder.main_model.token_count(json.dumps(msgs)) res.append((tokens, "system messages", "")) # chat history @@ -117,7 +117,7 @@ class Commands: if msgs: msgs = [dict(role="dummy", content=msg) for msg in msgs] msgs = json.dumps(msgs) - tokens = len(self.tokenizer.encode(msgs)) + tokens = self.coder.main_model.token_count(msgs) res.append((tokens, "chat history", "use /clear to clear")) # repo map @@ -125,7 +125,7 @@ class Commands: if self.coder.repo_map: repo_content = self.coder.repo_map.get_repo_map(self.coder.abs_fnames, other_files) if repo_content: - tokens = len(self.tokenizer.encode(repo_content)) + tokens = self.coder.main_model.token_count(repo_content) res.append((tokens, "repository map", "use --map-tokens to resize")) # files @@ -134,7 +134,7 @@ class Commands: content = self.io.read_text(fname) # approximate content = f"{relative_fname}\n```\n" + content + "```\n" - tokens = len(self.tokenizer.encode(content)) + tokens = self.coder.main_model.token_count(content) res.append((tokens, f"{relative_fname}", "use /drop to drop from chat")) self.io.tool_output("Approximate context window usage, in tokens:") From c24a4a43927268eb7999660ed1b30c75f904a2d3 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 14 Nov 2023 10:11:13 -0800 Subject: [PATCH 05/16] refactor into Model.token_count() --- aider/coders/base_coder.py | 17 ++++++++++------- aider/commands.py | 4 +--- aider/models/model.py | 13 +++++++++++++ 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 57de58f45..1b419a110 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -420,15 +420,9 @@ class Coder: prompt = prompt.format(fence=self.fence) return prompt - def send_new_user_message(self, inp): + def format_messages(self): self.choose_fence() - - self.cur_messages += [ - dict(role="user", content=inp), - ] - main_sys = self.gpt_prompts.main_system - # if self.main_model.max_context_tokens > 4 * 1024: main_sys += "\n" + self.fmt_system_reminder() messages = [ @@ -440,6 +434,15 @@ class Coder: messages += self.get_files_messages() messages += self.cur_messages + return messages + + def send_new_user_message(self, inp): + self.cur_messages += [ + dict(role="user", content=inp), + ] + + messages = self.format_messages() + if self.verbose: utils.show_messages(messages, functions=self.functions) diff --git a/aider/commands.py b/aider/commands.py index c4e9f4b15..2027d64d6 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -1,4 +1,3 @@ -import json import re import subprocess import sys @@ -109,14 +108,13 @@ class Commands: dict(role="system", content=self.coder.gpt_prompts.main_system), dict(role="system", content=self.coder.gpt_prompts.system_reminder), ] - tokens = self.coder.main_model.token_count(json.dumps(msgs)) + tokens = self.coder.main_model.token_count(msgs) res.append((tokens, "system messages", "")) # chat history msgs = self.coder.done_messages + self.coder.cur_messages if msgs: msgs = [dict(role="dummy", content=msg) for msg in msgs] - msgs = json.dumps(msgs) tokens = self.coder.main_model.token_count(msgs) res.append((tokens, "chat history", "use /clear to clear")) diff --git a/aider/models/model.py b/aider/models/model.py index 904ec8af0..7522c97e0 100644 --- a/aider/models/model.py +++ b/aider/models/model.py @@ -1,3 +1,5 @@ +import json + import openai @@ -37,3 +39,14 @@ class Model: @staticmethod def commit_message_models(): return [Model.create("gpt-3.5-turbo"), Model.create("gpt-3.5-turbo-16k")] + + def token_count(self, messages): + if not self.tokenizer: + return + + if type(messages) is str: + msgs = messages + else: + msgs = json.dumps(messages) + + return len(self.tokenizer.encode(msgs)) From 3986da721051a436c280762588ce78871708cf0f Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 14 Nov 2023 10:25:08 -0800 Subject: [PATCH 06/16] just use a single S/R format system prompt --- aider/coders/base_coder.py | 24 +++++++++++++++++++----- aider/coders/editblock_prompts.py | 4 +++- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 1b419a110..918be1056 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -316,10 +316,6 @@ class Coder: dict(role="user", content=all_content), dict(role="assistant", content="Ok."), ] - if self.abs_fnames: - files_messages += [ - dict(role="system", content=self.fmt_system_reminder()), - ] return files_messages @@ -423,7 +419,7 @@ class Coder: def format_messages(self): self.choose_fence() main_sys = self.gpt_prompts.main_system - main_sys += "\n" + self.fmt_system_reminder() + # main_sys += "\n" + self.fmt_system_reminder() messages = [ dict(role="system", content=main_sys), @@ -432,6 +428,24 @@ class Coder: self.summarize_end() messages += self.done_messages messages += self.get_files_messages() + + reminder_message = [ + dict(role="system", content=self.fmt_system_reminder()), + ] + + # messages_tokens = self.main_model.token_count(messages) + # reminder_tokens = self.main_model.token_count(reminder_message) + # cur_tokens = self.main_model.token_count(self.cur_messages) + + # total_tokens = messages_tokens + reminder_tokens + cur_tokens + + # Add the reminder prompt if there's a lot of context since the original + # system prompt, and we still have room to include it. + # THRESH = 2048 + # if messages_tokens > THRESH and total_tokens < self.main_model.max_context_tokens: + + messages += reminder_message + messages += self.cur_messages return messages diff --git a/aider/coders/editblock_prompts.py b/aider/coders/editblock_prompts.py index 52d9288f8..3866e9da7 100644 --- a/aider/coders/editblock_prompts.py +++ b/aider/coders/editblock_prompts.py @@ -18,7 +18,9 @@ Once you understand the request you MUST: 3. Describe each change with a *SEARCH/REPLACE block* per the examples below. """ - system_reminder = """# Example conversation 1: + system_reminder = """All changes to files must use this *SEARCH/REPLACE block* format. + +# Example conversation 1: ## USER: These are the *read-write* files: From df236e4dec12c6b6c753f046e320c375850be21b Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 14 Nov 2023 10:30:02 -0800 Subject: [PATCH 07/16] stronger prompt to include comments, docstrings --- aider/coders/editblock_prompts.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/aider/coders/editblock_prompts.py b/aider/coders/editblock_prompts.py index 3866e9da7..94cbc35fa 100644 --- a/aider/coders/editblock_prompts.py +++ b/aider/coders/editblock_prompts.py @@ -39,6 +39,8 @@ def divide(n, d): return str(result) def factorial(n): + "compute factorial" + if n == 0: return 1 else: @@ -81,6 +83,8 @@ mathweb/flask/app.py {fence[0]}python <<<<<<< SEARCH def factorial(n): + "compute factorial" + if n == 0: return 1 else: @@ -106,6 +110,8 @@ mathweb/flask/app.py main.py {fence[0]}python def hello(): + "print a greeting" + print("hello") if __name__ == '__main__': @@ -130,6 +136,8 @@ hello.py <<<<<<< SEARCH ======= def hello(): + "print a greeting" + print("hello") >>>>>>> REPLACE {fence[1]} @@ -138,6 +146,8 @@ main.py {fence[0]}python <<<<<<< SEARCH def hello(): + "print a greeting" + print("hello") ======= from hello import hello @@ -150,13 +160,13 @@ Every *SEARCH/REPLACE block* must use this format: 1. The file path alone on a line, eg: main.py 2. The opening fence and code language, eg: {fence[0]}python 3. The start of search block: <<<<<<< SEARCH -4. A chunk of lines to search for in the existing source code +4. A contiguous chunk of lines to search for in the existing source code 5. The dividing line: ======= 6. The lines to replace into the source code 7. The end of the replace block: >>>>>>> REPLACE 8. The closing fence: {fence[1]} -Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character. +Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc. Every *SEARCH/REPLACE block* must be concise. Include just enough lines to uniquely specify the change. Don't include extra unchanging lines. From 26a340e21f4e00c472dce74a3b6c8c9935628bc7 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 14 Nov 2023 10:38:39 -0800 Subject: [PATCH 08/16] move the single format reminder into the top sys prompt --- aider/coders/base_coder.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 918be1056..6ae7d3c64 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -419,7 +419,7 @@ class Coder: def format_messages(self): self.choose_fence() main_sys = self.gpt_prompts.main_system - # main_sys += "\n" + self.fmt_system_reminder() + main_sys += "\n" + self.fmt_system_reminder() messages = [ dict(role="system", content=main_sys), @@ -429,9 +429,9 @@ class Coder: messages += self.done_messages messages += self.get_files_messages() - reminder_message = [ - dict(role="system", content=self.fmt_system_reminder()), - ] + # reminder_message = [ + # dict(role="system", content=self.fmt_system_reminder()), + # ] # messages_tokens = self.main_model.token_count(messages) # reminder_tokens = self.main_model.token_count(reminder_message) @@ -444,7 +444,7 @@ class Coder: # THRESH = 2048 # if messages_tokens > THRESH and total_tokens < self.main_model.max_context_tokens: - messages += reminder_message + # messages += reminder_message messages += self.cur_messages From e0fea58276737e3f5788d820ecac9003bbcb64b2 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 14 Nov 2023 10:46:16 -0800 Subject: [PATCH 09/16] Put back 2nd reminder --- aider/coders/base_coder.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 6ae7d3c64..84c269051 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -429,22 +429,19 @@ class Coder: messages += self.done_messages messages += self.get_files_messages() - # reminder_message = [ - # dict(role="system", content=self.fmt_system_reminder()), - # ] + reminder_message = [ + dict(role="system", content=self.fmt_system_reminder()), + ] - # messages_tokens = self.main_model.token_count(messages) - # reminder_tokens = self.main_model.token_count(reminder_message) - # cur_tokens = self.main_model.token_count(self.cur_messages) + messages_tokens = self.main_model.token_count(messages) + reminder_tokens = self.main_model.token_count(reminder_message) + cur_tokens = self.main_model.token_count(self.cur_messages) - # total_tokens = messages_tokens + reminder_tokens + cur_tokens + total_tokens = messages_tokens + reminder_tokens + cur_tokens - # Add the reminder prompt if there's a lot of context since the original - # system prompt, and we still have room to include it. - # THRESH = 2048 - # if messages_tokens > THRESH and total_tokens < self.main_model.max_context_tokens: - - # messages += reminder_message + # Add the reminder prompt if we still have room to include it. + if total_tokens < self.main_model.max_context_tokens: + messages += reminder_message messages += self.cur_messages From bbc174a8fbf209b20cad6bd59c20b433ea8e5d31 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 14 Nov 2023 11:01:29 -0800 Subject: [PATCH 10/16] just use the Rules as the reminder --- aider/coders/editblock_prompts.py | 16 ++++++---------- aider/commands.py | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/aider/coders/editblock_prompts.py b/aider/coders/editblock_prompts.py index 94cbc35fa..2e72a1387 100644 --- a/aider/coders/editblock_prompts.py +++ b/aider/coders/editblock_prompts.py @@ -16,9 +16,8 @@ Once you understand the request you MUST: 1. List the files you need to modify. *NEVER* suggest changes to a *read-only* file. Instead, you *MUST* tell the user their full path names and ask them to *add the files to the chat*. End your reply and wait for their approval. 2. Think step-by-step and explain the needed changes. 3. Describe each change with a *SEARCH/REPLACE block* per the examples below. -""" - system_reminder = """All changes to files must use this *SEARCH/REPLACE block* format. +All changes to files must use this *SEARCH/REPLACE block* format. # Example conversation 1: @@ -155,8 +154,9 @@ from hello import hello {fence[1]} # Rules +""" -Every *SEARCH/REPLACE block* must use this format: + system_reminder = """Every *SEARCH/REPLACE block* must use this format: 1. The file path alone on a line, eg: main.py 2. The opening fence and code language, eg: {fence[0]}python 3. The start of search block: <<<<<<< SEARCH @@ -169,24 +169,20 @@ Every *SEARCH/REPLACE block* must use this format: Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc. Every *SEARCH/REPLACE block* must be concise. Include just enough lines to uniquely specify the change. -Don't include extra unchanging lines. -Every *SEARCH/REPLACE block* must be fenced with {fence[0]} and {fence[1]}, with the correct code language. - -Every *SEARCH/REPLACE block* must start with the full path! NEVER try to *SEARCH/REPLACE* any *read-only* files. If you want to put code in a new file, use a *SEARCH/REPLACE block* with: - A new file path, including dir name if needed - An empty `SEARCH` section -- The new file's contents in the `updated` section +- The new file's contents in the `REPLACE` section """ files_content_prefix = "These are the *read-write* files:\n" files_no_full_files = "I am not sharing any *read-write* files yet." - repo_content_prefix = """Below here are summaries of other files! + repo_content_prefix = """Below here are summaries of other files present in this git repository. Do not propose changes to these files, they are *read-only*. -To make a file *read-write*, ask me to *add it to the chat*. +To make a file *read-write*, ask the user to *add it to the chat*. """ diff --git a/aider/commands.py b/aider/commands.py index 2027d64d6..c98b60cd5 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -103,10 +103,20 @@ class Commands: res = [] + self.coder.choose_fence() + # system messages msgs = [ - dict(role="system", content=self.coder.gpt_prompts.main_system), - dict(role="system", content=self.coder.gpt_prompts.system_reminder), + dict( + role="system", + content=self.coder.gpt_prompts.main_system + + "\n" + + self.coder.fmt_system_reminder(), + ), + dict( + role="system", + content=self.coder.fmt_system_reminder(), + ), ] tokens = self.coder.main_model.token_count(msgs) res.append((tokens, "system messages", "")) From 60ca8f8c0db684928dc525c7df7c297462d3a279 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 14 Nov 2023 11:07:35 -0800 Subject: [PATCH 11/16] properly render the main sys prompt --- aider/coders/base_coder.py | 9 ++++----- aider/commands.py | 15 +++++---------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 84c269051..6842e2dee 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -411,15 +411,14 @@ class Coder: return self.send_new_user_message(inp) - def fmt_system_reminder(self): - prompt = self.gpt_prompts.system_reminder + def fmt_system_prompt(self, prompt): prompt = prompt.format(fence=self.fence) return prompt def format_messages(self): self.choose_fence() - main_sys = self.gpt_prompts.main_system - main_sys += "\n" + self.fmt_system_reminder() + main_sys = self.fmt_system_prompt(self.gpt_prompts.main_system) + main_sys += "\n" + self.fmt_system_prompt(self.gpt_prompts.system_reminder) messages = [ dict(role="system", content=main_sys), @@ -430,7 +429,7 @@ class Coder: messages += self.get_files_messages() reminder_message = [ - dict(role="system", content=self.fmt_system_reminder()), + dict(role="system", content=self.fmt_system_prompt(self.gpt_prompts.system_reminder)), ] messages_tokens = self.main_model.token_count(messages) diff --git a/aider/commands.py b/aider/commands.py index c98b60cd5..ff4dc9de7 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -106,18 +106,13 @@ class Commands: self.coder.choose_fence() # system messages + main_sys = self.fmt_system_prompt(self.gpt_prompts.main_system) + main_sys += "\n" + self.fmt_system_prompt(self.gpt_prompts.system_reminder) msgs = [ - dict( - role="system", - content=self.coder.gpt_prompts.main_system - + "\n" - + self.coder.fmt_system_reminder(), - ), - dict( - role="system", - content=self.coder.fmt_system_reminder(), - ), + dict(role="system", content=main_sys), + dict(role="system", content=self.fmt_system_prompt(self.gpt_prompts.system_reminder)), ] + tokens = self.coder.main_model.token_count(msgs) res.append((tokens, "system messages", "")) From 253354da98e8b9429bebf738e6d69457e09d3ddd Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 14 Nov 2023 11:16:39 -0800 Subject: [PATCH 12/16] stronger elide prompt --- aider/coders/editblock_prompts.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/aider/coders/editblock_prompts.py b/aider/coders/editblock_prompts.py index 2e72a1387..d8f830e2f 100644 --- a/aider/coders/editblock_prompts.py +++ b/aider/coders/editblock_prompts.py @@ -166,9 +166,10 @@ from hello import hello 7. The end of the replace block: >>>>>>> REPLACE 8. The closing fence: {fence[1]} -Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc. -Every *SEARCH/REPLACE block* must be concise. -Include just enough lines to uniquely specify the change. +Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character, including every line, comment, docstring, etc. +Every *SEARCH* section must be concise. Include just enough lines to uniquely specify the change. + +Include *ALL* the code being searched and replaced. Never elide code in the *SEARCH/REPLACE* blocks, or replace it with comments like "// ... rest of code ...". NEVER try to *SEARCH/REPLACE* any *read-only* files. From 9cbe114a74f9c5176f498bcf0e896bce3e380d71 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 14 Nov 2023 11:30:14 -0800 Subject: [PATCH 13/16] positivity --- aider/coders/editblock_prompts.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/aider/coders/editblock_prompts.py b/aider/coders/editblock_prompts.py index d8f830e2f..8f1b7849f 100644 --- a/aider/coders/editblock_prompts.py +++ b/aider/coders/editblock_prompts.py @@ -7,14 +7,14 @@ class EditBlockPrompts(CoderPrompts): main_system = """Act as an expert software developer. Always use best practices when coding. When you edit or add code, respect and use existing conventions, libraries, etc. -Always COMPLETELY IMPLEMENT the needed code. Never just leave comments about more work that will need to be done. +Always COMPLETELY IMPLEMENT the needed code. Take requests for changes to the supplied code. If the request is ambiguous, ask questions. Once you understand the request you MUST: -1. List the files you need to modify. *NEVER* suggest changes to a *read-only* file. Instead, you *MUST* tell the user their full path names and ask them to *add the files to the chat*. End your reply and wait for their approval. -2. Think step-by-step and explain the needed changes. +1. List the files you need to modify. Only suggest changes to a *read-write* files. Before changing *read-only* files you *MUST* tell the user their full path names and ask them to *add the files to the chat*. End your reply and wait for their approval. +2. Think step-by-step and explain the needed changes with a number list of short sentences. 3. Describe each change with a *SEARCH/REPLACE block* per the examples below. All changes to files must use this *SEARCH/REPLACE block* format. @@ -166,12 +166,11 @@ from hello import hello 7. The end of the replace block: >>>>>>> REPLACE 8. The closing fence: {fence[1]} -Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character, including every line, comment, docstring, etc. -Every *SEARCH* section must be concise. Include just enough lines to uniquely specify the change. +Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc. -Include *ALL* the code being searched and replaced. Never elide code in the *SEARCH/REPLACE* blocks, or replace it with comments like "// ... rest of code ...". +Include *ALL* the code being searched and replaced! -NEVER try to *SEARCH/REPLACE* any *read-only* files. +Only *SEARCH/REPLACE* files that are *read-write*. If you want to put code in a new file, use a *SEARCH/REPLACE block* with: - A new file path, including dir name if needed From d2f924d6d1d7b6366e9d5bb80ced451efd9bd6f7 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 14 Nov 2023 13:03:25 -0800 Subject: [PATCH 14/16] fixed /tokens --- aider/commands.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/aider/commands.py b/aider/commands.py index ff4dc9de7..8b05e4af5 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -106,11 +106,14 @@ class Commands: self.coder.choose_fence() # system messages - main_sys = self.fmt_system_prompt(self.gpt_prompts.main_system) - main_sys += "\n" + self.fmt_system_prompt(self.gpt_prompts.system_reminder) + main_sys = self.coder.fmt_system_prompt(self.coder.gpt_prompts.main_system) + main_sys += "\n" + self.coder.fmt_system_prompt(self.coder.gpt_prompts.system_reminder) msgs = [ dict(role="system", content=main_sys), - dict(role="system", content=self.fmt_system_prompt(self.gpt_prompts.system_reminder)), + dict( + role="system", + content=self.coder.fmt_system_prompt(self.coder.gpt_prompts.system_reminder), + ), ] tokens = self.coder.main_model.token_count(msgs) From 6c77c32d06368fd549a1a7914d31a8d284a377f1 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 14 Nov 2023 13:05:47 -0800 Subject: [PATCH 15/16] aider: Refactored `test_check_gitignore` to use a temporary git config file and set the `GIT_CONFIG_GLOBAL` environment variable during the test execution. # Aider chat conversation: USER: in `test_check_gitignore` can we have gitpython not use the global gitignore? ASSISTANT: To ensure that GitPython does not use the global gitignore, we can set the `GIT_CONFIG_GLOBAL` environment variable to point to a non-existent file or an empty temporary file during the test execution. This will effectively ignore the global gitignore settings. We will modify the `test_check_gitignore` method in the `tests/test_main.py` file to set the `GIT_CONFIG_GLOBAL` environment variable before the test runs and reset it after the test is completed. Here are the *SEARCH/REPLACE* blocks: tests/test_main.py ```python <<<<<<< SEARCH def test_check_gitignore(self): make_repo() io = InputOutput(pretty=False, yes=True) cwd = Path.cwd() gitignore = cwd / ".gitignore" self.assertFalse(gitignore.exists()) check_gitignore(cwd, io) self.assertTrue(gitignore.exists()) self.assertEqual(".aider*", gitignore.read_text().splitlines()[0]) gitignore.write_text("one\ntwo\n") check_gitignore(cwd, io) self.assertEqual("one\ntwo\n.aider*\n", gitignore.read_text()) ======= def test_check_gitignore(self): with tempfile.NamedTemporaryFile() as temp_gitconfig: os.environ['GIT_CONFIG_GLOBAL'] = temp_gitconfig.name make_repo() io = InputOutput(pretty=False, yes=True) cwd = Path.cwd() gitignore = cwd / ".gitignore" self.assertFalse(gitignore.exists()) check_gitignore(cwd, io) self.assertTrue(gitignore.exists()) self.assertEqual(".aider*", gitignore.read_text().splitlines()[0]) gitignore.write_text("one\ntwo\n") check_gitignore(cwd, io) self.assertEqual("one\ntwo\n.aider*\n", gitignore.read_text()) del os.environ['GIT_CONFIG_GLOBAL'] >>>>>>> REPLACE ``` This change will create a temporary file to act as a dummy global git configuration file and set the `GIT_CONFIG_GLOBAL` environment variable to its path. After the test, it will delete the environment variable to avoid affecting other tests or operations. --- tests/test_main.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index 214679843..baffef430 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -113,20 +113,23 @@ class TestMain(TestCase): self.assertEqual(".aider*", gitignore.read_text().splitlines()[0]) def test_check_gitignore(self): - make_repo() - io = InputOutput(pretty=False, yes=True) - cwd = Path.cwd() - gitignore = cwd / ".gitignore" + with tempfile.NamedTemporaryFile() as temp_gitconfig: + os.environ['GIT_CONFIG_GLOBAL'] = temp_gitconfig.name + make_repo() + io = InputOutput(pretty=False, yes=True) + cwd = Path.cwd() + gitignore = cwd / ".gitignore" - self.assertFalse(gitignore.exists()) - check_gitignore(cwd, io) - self.assertTrue(gitignore.exists()) + self.assertFalse(gitignore.exists()) + check_gitignore(cwd, io) + self.assertTrue(gitignore.exists()) - self.assertEqual(".aider*", gitignore.read_text().splitlines()[0]) + self.assertEqual(".aider*", gitignore.read_text().splitlines()[0]) - gitignore.write_text("one\ntwo\n") - check_gitignore(cwd, io) - self.assertEqual("one\ntwo\n.aider*\n", gitignore.read_text()) + gitignore.write_text("one\ntwo\n") + check_gitignore(cwd, io) + self.assertEqual("one\ntwo\n.aider*\n", gitignore.read_text()) + del os.environ['GIT_CONFIG_GLOBAL'] def test_main_git_ignore(self): cwd = Path().cwd() From 4aba307ebd4fae3599a1cb6b86d167125d629c20 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 14 Nov 2023 13:09:50 -0800 Subject: [PATCH 16/16] defend against no tokenizer --- aider/coders/base_coder.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 6842e2dee..d418c85df 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -436,7 +436,11 @@ class Coder: reminder_tokens = self.main_model.token_count(reminder_message) cur_tokens = self.main_model.token_count(self.cur_messages) - total_tokens = messages_tokens + reminder_tokens + cur_tokens + if None not in (messages_tokens, reminder_tokens, cur_tokens): + total_tokens = messages_tokens + reminder_tokens + cur_tokens + else: + # add the reminder anyway + total_tokens = 0 # Add the reminder prompt if we still have room to include it. if total_tokens < self.main_model.max_context_tokens: