diff --git a/aider/coders/udiff_prompts.py b/aider/coders/udiff_prompts.py index 4ab30bfc4..8608152ad 100644 --- a/aider/coders/udiff_prompts.py +++ b/aider/coders/udiff_prompts.py @@ -5,7 +5,9 @@ from .base_prompts import CoderPrompts class UnifiedDiffPrompts(CoderPrompts): main_system = """Act as an expert software developer. -You are diligent and tireless, and you always COMPLETELY IMPLEMENT the needed code. +You are diligent and tireless! +You NEVER leave comments describing code without implementing it! +You always COMPLETELY IMPLEMENT the needed code! Always use best practices when coding. Respect and use existing conventions, libraries, etc that are already present in the code base. @@ -95,6 +97,10 @@ Delete the entire existing version with `-` lines and then add a new, updated ve This will help you generate correct code and correct diffs. To make a new file, show a diff from `--- /dev/null` to `+++ path/to/new/file.ext`. + +You are diligent and tireless! +You NEVER leave comments describing code without implementing it! +You always COMPLETELY IMPLEMENT the needed code! """ files_content_prefix = "These are the *read-write* files:\n" diff --git a/benchmark/refactor_tools.py b/benchmark/refactor_tools.py index a54663377..a29aa6e9a 100755 --- a/benchmark/refactor_tools.py +++ b/benchmark/refactor_tools.py @@ -132,7 +132,10 @@ def find_non_self_methods(path): non_self_methods = [] for filename in python_files: with open(filename, "r") as file: - node = ast.parse(file.read(), filename=filename) + try: + node = ast.parse(file.read(), filename=filename) + except: + pass checker = SelfUsageChecker() checker.visit(node) for method in checker.non_self_methods: @@ -145,7 +148,7 @@ def process(entry): fname, class_name, method_name, class_children, method_children = entry if method_children > class_children / 2: return - if method_children < 100: + if method_children < 250: return fname = Path(fname) @@ -154,7 +157,7 @@ def process(entry): print(f"{fname} {class_name} {method_name} {class_children} {method_children}") - dname = Path("tmp.benchmarks/refactor-benchmark") + dname = Path("tmp.benchmarks/refactor-benchmark-pylint") dname.mkdir(exist_ok=True) dname = dname / f"{fname.stem}_{class_name}_{method_name}" diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index fe28fd8f5..9cc328155 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -79,8 +79,8 @@ code edits, because it's the default output format of `git diff`: ```diff ---- a/hello.py -+++ b/hello.py +--- a/greeting.py ++++ b/greeting.py @@ -1,5 +1,5 @@ def main(args): # show a greeting @@ -246,6 +246,7 @@ They exhibit a variety of problems: - GPT forgets things like comments, docstrings, blank lines, etc. Or it skips over some code that it doesn't intend to change. - GPT forgets the leading *plus* `+` character to mark novel lines that it wants to add to the file. It incorrectly includes them with a leading *space* as if they were already there. +- GPT outdents all of the code, removing all the leading white space which is shared across the lines. So a chunk of deeply indented code is shown in a diff with only the leading white space that changes between the lines in the chunk. - GPT jumps ahead to show edits to a different part of the file without starting a new hunk with a `@@ ... @@` divider. As an example of the first issue, consider this source code: @@ -285,6 +286,7 @@ If a hunk doesn't apply cleanly, aider uses a number of strategies: - Normalize the hunk, by taking the *minus* `-` and *space* lines as one version of the hunk and the *space* and *plus* `+` lines as a second version and doing an actual unified diff on them. - Try and discover new lines that GPT is trying to add but which it forgot to mark with *plus* `+` markers. This is done by diffing the *minus* `-` and *space* lines back against the original file. +- Try and apply the hunk using "relative leading white space", so we can match and patch correctly even if the hunk has been uniformly indented or outdented. - Break a large hunk apart into an overlapping sequence of smaller hunks, which each contain only one contiguous run of *plus* `+` and *minus* `-` lines. Try and apply each of these sub-hunks independently. - Vary the size and offset of the "context window" of *space* lines from the hunk that are used to localize the edit to a specific part of the file. - Combine the above mechanisms to progressively become more permissive about how to apply the hunk.