laziness24-turbo-udiff-never2x

2025-05-25 23:05:00 +00:00 · 2023-12-18 18:43:15 -08:00 · 2023-12-18 18:43:15 -08:00 · 308007a8e9
commit 308007a8e9
parent fd4e890217
3 changed files with 17 additions and 6 deletions
--- a/aider/coders/udiff_prompts.py
+++ b/aider/coders/udiff_prompts.py
@ -5,7 +5,9 @@ from .base_prompts import CoderPrompts

 class UnifiedDiffPrompts(CoderPrompts):
    main_system = """Act as an expert software developer.
-You are diligent and tireless, and you always COMPLETELY IMPLEMENT the needed code.
+You are diligent and tireless!
+You NEVER leave comments describing code without implementing it!
+You always COMPLETELY IMPLEMENT the needed code!
 Always use best practices when coding.
 Respect and use existing conventions, libraries, etc that are already present in the code base.

@ -95,6 +97,10 @@ Delete the entire existing version with `-` lines and then add a new, updated ve
 This will help you generate correct code and correct diffs.

 To make a new file, show a diff from `--- /dev/null` to `+++ path/to/new/file.ext`.
+
+You are diligent and tireless!
+You NEVER leave comments describing code without implementing it!
+You always COMPLETELY IMPLEMENT the needed code!
 """

    files_content_prefix = "These are the *read-write* files:\n"
--- a/benchmark/refactor_tools.py
+++ b/benchmark/refactor_tools.py
@ -132,7 +132,10 @@ def find_non_self_methods(path):
    non_self_methods = []
    for filename in python_files:
        with open(filename, "r") as file:
+            try:
                node = ast.parse(file.read(), filename=filename)
+            except:
+                pass
            checker = SelfUsageChecker()
            checker.visit(node)
            for method in checker.non_self_methods:
@ -145,7 +148,7 @@ def process(entry):
    fname, class_name, method_name, class_children, method_children = entry
    if method_children > class_children / 2:
        return
-    if method_children < 100:
+    if method_children < 250:
        return

    fname = Path(fname)
@ -154,7 +157,7 @@ def process(entry):

    print(f"{fname} {class_name} {method_name} {class_children} {method_children}")

-    dname = Path("tmp.benchmarks/refactor-benchmark")
+    dname = Path("tmp.benchmarks/refactor-benchmark-pylint")
    dname.mkdir(exist_ok=True)

    dname = dname / f"{fname.stem}_{class_name}_{method_name}"
--- a/docs/unified-diffs.md
+++ b/docs/unified-diffs.md
@ -79,8 +79,8 @@ code edits, because it's the
 default output format of `git diff`:

 ```diff
--- a/hello.py
-+++ b/hello.py
+--- a/greeting.py
+++ b/greeting.py
@@ -1,5 +1,5 @@
 def main(args):
     # show a greeting
@ -246,6 +246,7 @@ They exhibit a variety of problems:

 - GPT forgets things like comments, docstrings, blank lines, etc. Or it skips over some code that it doesn't intend to change.
 - GPT forgets the leading *plus* `+` character to mark novel lines that it wants to add to the file. It incorrectly includes them with a leading *space* as if they were already there.
+- GPT outdents all of the code, removing all the leading white space which is shared across the lines. So a chunk of deeply indented code is shown in a diff with only the leading white space that changes between the lines in the chunk.
 - GPT jumps ahead to show edits to a different part of the file without starting a new hunk with a `@@ ... @@` divider.

 As an example of the first issue, consider this source code:
@ -285,6 +286,7 @@ If a hunk doesn't apply cleanly, aider uses a number of strategies:

 - Normalize the hunk, by taking the *minus* `-` and *space* lines as one version of the hunk and the *space* and *plus* `+` lines as a second version and doing an actual unified diff on them.
 - Try and discover new lines that GPT is trying to add but which it forgot to mark with *plus* `+` markers. This is done by diffing the *minus* `-` and *space* lines back against the original file.
+- Try and apply the hunk using "relative leading white space", so we can match and patch correctly even if the hunk has been uniformly indented or outdented.
 - Break a large hunk apart into an overlapping sequence of smaller hunks, which each contain only one contiguous run of *plus* `+` and *minus* `-` lines. Try and apply each of these sub-hunks independently.
 - Vary the size and offset of the "context window" of *space*  lines from the hunk that are used to localize the edit to a specific part of the file.
 - Combine the above mechanisms to progressively become more permissive about how to apply the hunk.