works

2025-05-28 16:25:00 +00:00 · 2023-08-03 16:22:49 -03:00 · 2023-08-03 16:22:49 -03:00 · 26ebc715eb
commit 26ebc715eb
parent a0f03ab0ce
2 changed files with 75 additions and 40 deletions
--- a/aider/coders/editblock_coder.py
+++ b/aider/coders/editblock_coder.py
@ -52,13 +52,7 @@ def prep(content):
    return content, lines


-def replace_most_similar_chunk(whole, part, replace):
-    """Best efforts to find the `part` lines in `whole` and replace them with `replace`"""
-
-    whole, whole_lines = prep(whole)
-    part, part_lines = prep(part)
-    replace, replace_lines = prep(replace)
-
+def perfect_or_whitespace(whole_lines, part_lines, replace_lines):
    # Try for a perfect match
    if part_lines in whole_lines:
        updated_lines = whole_lines.replace(part_lines, replace_lines)
@ -69,6 +63,26 @@ def replace_most_similar_chunk(whole, part, replace):
    if res:
        return res

+
+def replace_most_similar_chunk(whole, part, replace):
+    """Best efforts to find the `part` lines in `whole` and replace them with `replace`"""
+
+    whole, whole_lines = prep(whole)
+    part, part_lines = prep(part)
+    replace, replace_lines = prep(replace)
+
+    res = perfect_or_whitespace(whole_lines, part_lines, replace_lines)
+    if res:
+        return res
+
+    # drop leading empty line, GPT sometimes adds them spuriously (issue #25)
+    if len(part_lines) > 2 and not part_lines[0].strip():
+        skip_blank_line_part_lines = part_lines[1:]
+        res = perfect_or_whitespace(whole_lines, skip_blank_line_part_lines, replace_lines)
+        if res:
+            dump(repr(res))
+            return res
+
    # Try to handle when it elides code with ...
    try:
        res = try_dotdotdots(whole, part, replace)
@ -78,7 +92,9 @@ def replace_most_similar_chunk(whole, part, replace):
        pass

    # Try fuzzy matching
-    return replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines)
+    res = replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines)
+    if res:
+        return res


 def try_dotdotdots(whole, part, replace):
@ -135,53 +151,72 @@ def try_dotdotdots(whole, part, replace):


 def replace_part_with_missing_leading_whitespace(whole_lines, part_lines, replace_lines):
-    dump(repr(part_lines), repr(replace_lines))
+    dump(whole_lines)
+    dump(repr(whole_lines))
+    dump(repr(part_lines))
+    dump(repr(replace_lines))

    # GPT often messes up leading whitespace.
    # It usually does it uniformly across the ORIG and UPD blocks.
    # Either omitting all leading whitespace, or including only some of it.

+    # Outdent everything in part_lines and replace_lines by the max fixed amount possible
    leading = [len(p) - len(p.lstrip()) for p in part_lines if p.strip()] + [
        len(p) - len(p.lstrip()) for p in replace_lines if p.strip()
    ]

-    # Outdent everything in part_lines and replace_lines by the max fixed amount possible
    if leading and min(leading):
-        leading = min(leading)
-        part_lines = [p[leading:] if p.strip() else p for p in part_lines]
-        replace_lines = [p[leading:] if p.strip() else p for p in replace_lines]
-
-    # TODO: this logic needs to be fixed
-    # if the max outdent still leaves space
-    if all((not pline or pline[0].isspace()) for pline in part_lines):
-        return
+        num_leading = min(leading)
+        part_lines = [p[num_leading:] if p.strip() else p for p in part_lines]
+        replace_lines = [p[num_leading:] if p.strip() else p for p in replace_lines]

    # can we find an exact match not including the leading whitespace
-    for i in range(len(whole_lines) - len(part_lines) + 1):
-        leading_whitespace = ""
-        for j, c in enumerate(whole_lines[i]):
-            if c == part_lines[0][0]:
-                leading_whitespace = whole_lines[i][:j]
-                break
+    num_part_lines = len(part_lines)

-        if not leading_whitespace or not all(c.isspace() for c in leading_whitespace):
-            continue
+    dump(part_lines)
+    dump(replace_lines)

-        matched = all(
-            whole_lines[i + k].startswith(leading_whitespace + part_lines[k])
-            for k in range(len(part_lines))
+    for i in range(len(whole_lines) - num_part_lines + 1):
+        add_leading = match_but_for_leading_whitespace(
+            whole_lines[i : i + num_part_lines], part_lines
        )

-        if matched:
-            replace_lines = [
-                leading_whitespace + rline if rline else rline for rline in replace_lines
-            ]
-            whole_lines = whole_lines[:i] + replace_lines + whole_lines[i + len(part_lines) :]
-            return "".join(whole_lines)
+        if add_leading is None:
+            continue
+
+        dump(len(add_leading))
+
+        replace_lines = [add_leading + rline if rline.strip() else rline for rline in replace_lines]
+        whole_lines = whole_lines[:i] + replace_lines + whole_lines[i + num_part_lines :]
+        dump(repr(whole_lines))
+        return "".join(whole_lines)

    return None


+def match_but_for_leading_whitespace(whole_lines, part_lines):
+    dump(whole_lines, part_lines)
+
+    num = len(whole_lines)
+
+    # does the non-whitespace all agree?
+    if not all(whole_lines[i].lstrip() == part_lines[i].lstrip() for i in range(num)):
+        return
+
+    # are they all offset the same?
+    add = set(
+        whole_lines[i][: len(whole_lines[i]) - len(part_lines[i])]
+        for i in range(num)
+        if whole_lines[i].strip()
+    )
+
+    dump(add)
+    if len(add) != 1:
+        return
+
+    return add.pop()
+
+
 def replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines):
    similarity_thresh = 0.8

--- a/tests/test_editblock.py
+++ b/tests/test_editblock.py
@ -246,14 +246,14 @@ These changes replace the `subprocess.run` patches with `subprocess.check_output

    def test_replace_part_with_just_some_missing_leading_whitespace(self):
        whole = "    line1\n    line2\n    line3\n"
-        part = " line1\n     line2\n"
-        replace = "new_line1\nnew_line2\n"
+        part = " line1\n line2\n"
+        replace = " new_line1\n     new_line2\n"
        expected_output = "    new_line1\n        new_line2\n    line3\n"

        result = eb.replace_most_similar_chunk(whole, part, replace)
        self.assertEqual(result, expected_output)

-    def test_replace_part_with_missing_leading_whitespace_including_blank_lines(self):
+    def test_replace_part_with_missing_leading_whitespace_including_blank_line(self):
        """
        The part has leading whitespace on all lines, so should be ignored.
        But it has a *blank* line with no whitespace at all, which was causing a
@ -261,8 +261,8 @@ These changes replace the `subprocess.run` patches with `subprocess.check_output
        """
        whole = "    line1\n    line2\n    line3\n"
        part = "\n  line1\n  line2\n"
-        replace = "new_line1\nnew_line2\n"
-        expected_output = None
+        replace = "  new_line1\n  new_line2\n"
+        expected_output = "    new_line1\n    new_line2\n    line3\n"

        result = eb.replace_most_similar_chunk(whole, part, replace)
        self.assertEqual(result, expected_output)