From ff3a75413b5985d05c9bde22dd4b9c3bf500702f Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Thu, 5 Sep 2024 13:30:25 -0700 Subject: [PATCH] sonnet+deep got 60.9/82.0 --- aider/coders/ask_prompts.py | 1 + aider/coders/wholefile_prompts.py | 18 +++------------- benchmark/benchmark.py | 35 +++++++++++++++++++++++++------ 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/aider/coders/ask_prompts.py b/aider/coders/ask_prompts.py index 98565af0b..a137a9403 100644 --- a/aider/coders/ask_prompts.py +++ b/aider/coders/ask_prompts.py @@ -6,6 +6,7 @@ from .base_prompts import CoderPrompts class AskPrompts(CoderPrompts): main_system = """Act as an expert code analyst. Answer questions about the supplied code. +Explain any needed code changes clearly but concisely. Always reply to the user in the same language they are using. """ diff --git a/aider/coders/wholefile_prompts.py b/aider/coders/wholefile_prompts.py index 66eac016a..e34513643 100644 --- a/aider/coders/wholefile_prompts.py +++ b/aider/coders/wholefile_prompts.py @@ -5,16 +5,8 @@ from .base_prompts import CoderPrompts class WholeFilePrompts(CoderPrompts): main_system = """Act as an expert software developer. -Take requests for changes to the supplied code. -If the request is ambiguous, ask questions. - -Always reply to the user in the same language they are using. - {lazy_prompt} -Once you understand the request you MUST: -1. Determine if any code changes are needed. -2. Explain any needed changes. -3. If changes are needed, output a copy of each file that needs changes. +Output a copy of each file that needs changes. """ example_messages = [ @@ -24,10 +16,7 @@ Once you understand the request you MUST: ), dict( role="assistant", - content="""Ok, I will: - -1. Switch the greeting text from "Hello" to "Hey". - + content=""" show_greeting.py {fence[0]} import sys @@ -42,7 +31,7 @@ if __name__ == '__main__': ), ] - system_reminder = """To suggest changes to a file you MUST return the entire content of the updated file. + system_reminder = """You MUST return the entire content of the updated file. You MUST use this *file listing* format: path/to/filename.js @@ -57,7 +46,6 @@ Every *file listing* MUST use this format: - ... entire content of the file ... - Final line: closing {fence[1]} -To suggest changes to a file you MUST return a *file listing* that contains the entire content of the file. *NEVER* skip, omit or elide content from a *file listing* using "..." or by adding comments like "... rest of code..."! Create a new file you MUST return a *file listing* which includes an appropriate filename, including any appropriate path. diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index e77853a69..3ff2fbf5e 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -549,18 +549,21 @@ def run_test_real( chat_history_file=history_fname, ) - main_model = models.Model(model_name) - edit_format = edit_format or main_model.edit_format + # ask_model = models.Model("openrouter/anthropic/claude-3.5-sonnet") + # ask_model = models.Model("openrouter/anthropic/claude-3.5-sonnet") + ask_model = models.Model("openrouter/deepseek/deepseek-chat") + whole_model = models.Model("openrouter/deepseek/deepseek-chat") + + main_model = ask_model + edit_format = "ask-whole" dump(main_model) dump(edit_format) show_fnames = ",".join(map(str, fnames)) print("fnames:", show_fnames) - coder = Coder.create( - main_model, - edit_format, - io, + coder_kwargs = dict( + io=io, fnames=fnames, use_git=False, stream=False, @@ -568,6 +571,12 @@ def run_test_real( # auto_lint=False, # disabled for code-in-json experiments cache_prompts=True, ) + coder = Coder.create( + main_model=ask_model, + edit_format="ask", + **coder_kwargs, + ) + coder.max_apply_update_errors = max_apply_update_errors timeouts = 0 @@ -592,7 +601,21 @@ def run_test_real( coder.apply_updates() else: + coder = Coder.create( + from_coder=coder, + main_model=ask_model, + edit_format="ask", + **coder_kwargs, + ) response = coder.run(with_message=instructions, preproc=False) + coder = Coder.create( + from_coder=coder, + main_model=whole_model, + edit_format="whole", + **coder_kwargs, + ) + response = coder.run(with_message="make those changes", preproc=False) + dur += time.time() - start if not no_aider: