added missing files

2025-05-20 12:24:59 +00:00 · 2023-12-17 13:00:32 -08:00 · 2023-12-17 13:00:32 -08:00 · 0d4b6b6885
commit 0d4b6b6885
parent 360846320f
5 changed files with 1918 additions and 0 deletions
--- a/aider/coders/search_replace.py
+++ b/aider/coders/search_replace.py
@ -0,0 +1,770 @@
 #!/usr/bin/env python
 import sys
 from pathlib import Path
 import git
 from diff_match_patch import diff_match_patch
 from tqdm import tqdm
 from aider.dump import dump
 from aider.utils import GitTemporaryDirectory
 class RelativeIndenter:
    """Rewrites text files to have relative indentation, which involves
    reformatting the leading white space on lines.  This format makes
    it easier to search and apply edits to pairs of code blocks which
    may differ significantly in their overall level of indentation.
    It removes leading white space which is shared with the preceding
    line.
    Original:
    ```
            Foo # indented 8
                Bar # indented 4 more than the previous line
                Baz # same indent as the previous line
                Fob # same indent as the previous line
    ```
    Becomes:
    ```
            Foo # indented 8
        Bar # indented 4 more than the previous line
    Baz # same indent as the previous line
    Fob # same indent as the previous line
    ```
    If the current line is *less* indented then the previous line,
    uses a unicode character to indicate outdenting.
    Original
    ```
            Foo
                Bar
                Baz
            Fob # indented 4 less than the previous line
    ```
    Becomes:
    ```
            Foo
        Bar
    Baz
    ←←←←Fob # indented 4 less than the previous line
    ```
    This is a similar original to the last one, but every line has
    been uniformly outdented:
    ```
    Foo
        Bar
        Baz
    Fob # indented 4 less than the previous line
    ```
    It becomes this result, which is very similar to the previous
    result.  Only the white space on the first line differs.  From the
    word Foo onwards, it is identical to the previous result.
    ```
    Foo
        Bar
    Baz
    ←←←←Fob # indented 4 less than the previous line
    ```
    """
    def __init__(self, texts):
        """
        Based on the texts, choose a unicode character that isn't in any of them.
        """
        chars = set()
        for text in texts:
            chars.update(text)
        ARROW = "←"
        if ARROW not in chars:
            self.marker = ARROW
        else:
            self.marker = self.select_unique_marker(chars)
    def select_unique_marker(self, chars):
        for codepoint in range(0x10FFFF, 0x10000, -1):
            marker = chr(codepoint)
            if marker not in chars:
                return marker
        raise ValueError("Could not find a unique marker")
    def make_relative(self, text):
        """
        Transform text to use relative indents.
        """
        if self.marker in text:
            raise ValueError("Text already contains the outdent marker: {self.marker}")
        lines = text.splitlines(keepends=True)
        output = []
        prev_indent = ""
        for line in lines:
            line_without_end = line.rstrip("\n\r")
            len_indent = len(line_without_end) - len(line_without_end.lstrip())
            indent = line[:len_indent]
            change = len_indent - len(prev_indent)
            if change > 0:
                cur_indent = indent[-change:]
            elif change < 0:
                cur_indent = self.marker * -change
            else:
                cur_indent = ""
            out_line = cur_indent + "\n" + line[len_indent:]
            # dump(len_indent, change, out_line)
            # print(out_line)
            output.append(out_line)
            prev_indent = indent
        res = "".join(output)
        return res
    def make_absolute(self, text):
        """
        Transform text from relative back to absolute indents.
        """
        lines = text.splitlines(keepends=True)
        output = []
        prev_indent = ""
        for i in range(0, len(lines), 2):
            dent = lines[i].rstrip("\r\n")
            non_indent = lines[i + 1]
            if dent.startswith(self.marker):
                len_outdent = len(dent)
                cur_indent = prev_indent[:-len_outdent]
            else:
                cur_indent = prev_indent + dent
            if not non_indent.rstrip("\r\n"):
                out_line = non_indent  # don't indent a blank line
            else:
                out_line = cur_indent + non_indent
            output.append(out_line)
            prev_indent = cur_indent
        res = "".join(output)
        if self.marker in res:
            # dump(res)
            raise ValueError("Error transforming text back to absolute indents")
        return res
 # The patches are created to change S->R.
 # So all the patch offsets are relative to S.
 # But O has a lot more content. So all the offsets are very wrong.
 #
 # But patch_apply() seems to imply that once patch N is located,
 # then it adjusts the offset of the next patch.
 #
 # This is great, because once we sync up after a big gap the nearby
 # patches are close to being located right.
 # Except when indentation has been changed by GPT.
 #
 # It would help to use the diff trick to build map_S_offset_to_O_offset().
 # Then update all the S offsets in the S->R patches to be O offsets.
 # Do we also need to update the R offsets?
 #
 # What if this gets funky/wrong?
 #
 def map_patches(texts, patches, debug):
    search_text, replace_text, original_text = texts
    dmp = diff_match_patch()
    dmp.Diff_Timeout = 5
    diff_s_o = dmp.diff_main(search_text, original_text)
    # diff_r_s = dmp.diff_main(replace_text, search_text)
    # dmp.diff_cleanupSemantic(diff_s_o)
    # dmp.diff_cleanupEfficiency(diff_s_o)
    if debug:
        html = dmp.diff_prettyHtml(diff_s_o)
        Path("tmp.html").write_text(html)
        dump(len(search_text))
        dump(len(original_text))
    for patch in patches:
        start1 = patch.start1
        start2 = patch.start2
        patch.start1 = dmp.diff_xIndex(diff_s_o, start1)
        patch.start2 = dmp.diff_xIndex(diff_s_o, start2)
        if debug:
            print()
            print(start1, repr(search_text[start1 : start1 + 50]))
            print(patch.start1, repr(original_text[patch.start1 : patch.start1 + 50]))
            print(patch.diffs)
            print()
    return patches
 example = """Left
 Left
    4 in
    4 in
        8 in
    4 in
 Left
 """
 """
 ri = RelativeIndenter([example])
 dump(example)
 rel_example = ri.make_relative(example)
 dump(repr(rel_example))
 abs_example = ri.make_absolute(rel_example)
 dump(abs_example)
 sys.exit()
 """
 def relative_indent(texts):
    ri = RelativeIndenter(texts)
    texts = list(map(ri.make_relative, texts))
    return ri, texts
 line_padding = 100
 def line_pad(text):
    padding = "\n" * line_padding
    return padding + text + padding
 def line_unpad(text):
    if set(text[:line_padding] + text[-line_padding:]) != set("\n"):
        return
    return text[line_padding:-line_padding]
 def dmp_apply(texts, remap=True):
    debug = False
    # debug = True
    search_text, replace_text, original_text = texts
    dmp = diff_match_patch()
    dmp.Diff_Timeout = 5
    # dmp.Diff_EditCost = 16
    if remap:
        dmp.Match_Threshold = 0.95
        dmp.Match_Distance = 500
        dmp.Match_MaxBits = 128
        dmp.Patch_Margin = 32
    else:
        dmp.Match_Threshold = 0.5
        dmp.Match_Distance = 100_000
        dmp.Match_MaxBits = 32
        dmp.Patch_Margin = 8
    diff = dmp.diff_main(search_text, replace_text, None)
    dmp.diff_cleanupSemantic(diff)
    dmp.diff_cleanupEfficiency(diff)
    patches = dmp.patch_make(search_text, diff)
    if debug:
        html = dmp.diff_prettyHtml(diff)
        Path("tmp.search_replace_diff.html").write_text(html)
        for d in diff:
            print(d[0], repr(d[1]))
        for patch in patches:
            start1 = patch.start1
            print()
            print(start1, repr(search_text[start1 : start1 + 10]))
            print(start1, repr(replace_text[start1 : start1 + 10]))
            print(patch.diffs)
        # dump(original_text)
        # dump(search_text)
    if remap:
        patches = map_patches(texts, patches, debug)
    patches_text = dmp.patch_toText(patches)
    new_text, success = dmp.patch_apply(patches, original_text)
    all_success = False not in success
    if debug:
        # dump(new_text)
        print(patches_text)
        # print(new_text)
        dump(success)
        dump(all_success)
        # print(new_text)
    if not all_success:
        return
    return new_text
 def lines_to_chars(lines, mapping):
    new_text = []
    for char in lines:
        new_text.append(mapping[ord(char)])
    new_text = "".join(new_text)
    return new_text
 def dmp_lines_apply(texts, remap=True):
    debug = False
    # debug = True
    for t in texts:
        assert t.endswith("\n"), t
    search_text, replace_text, original_text = texts
    dmp = diff_match_patch()
    dmp.Diff_Timeout = 5
    # dmp.Diff_EditCost = 16
    dmp.Match_Threshold = 0.1
    dmp.Match_Distance = 100_000
    dmp.Match_MaxBits = 32
    dmp.Patch_Margin = 1
    all_text = search_text + replace_text + original_text
    all_lines, _, mapping = dmp.diff_linesToChars(all_text, "")
    assert len(all_lines) == len(all_text.splitlines())
    search_num = len(search_text.splitlines())
    replace_num = len(replace_text.splitlines())
    original_num = len(original_text.splitlines())
    search_lines = all_lines[:search_num]
    replace_lines = all_lines[search_num : search_num + replace_num]
    original_lines = all_lines[search_num + replace_num :]
    assert len(search_lines) == search_num
    assert len(replace_lines) == replace_num
    assert len(original_lines) == original_num
    diff_lines = dmp.diff_main(search_lines, replace_lines, None)
    dmp.diff_cleanupSemantic(diff_lines)
    dmp.diff_cleanupEfficiency(diff_lines)
    patches = dmp.patch_make(search_lines, diff_lines)
    if debug:
        diff = list(diff_lines)
        dmp.diff_charsToLines(diff, mapping)
        dump(diff)
        html = dmp.diff_prettyHtml(diff)
        Path("tmp.search_replace_diff.html").write_text(html)
        for d in diff:
            print(d[0], repr(d[1]))
    new_lines, success = dmp.patch_apply(patches, original_lines)
    new_text = lines_to_chars(new_lines, mapping)
    all_success = False not in success
    if debug:
        # print(new_text)
        dump(success)
        dump(all_success)
        # print(new_text)
    if not all_success:
        return
    return new_text
 def diff_lines(search_text, replace_text):
    dmp = diff_match_patch()
    dmp.Diff_Timeout = 5
    # dmp.Diff_EditCost = 16
    search_lines, replace_lines, mapping = dmp.diff_linesToChars(search_text, replace_text)
    diff_lines = dmp.diff_main(search_lines, replace_lines, None)
    dmp.diff_cleanupSemantic(diff_lines)
    dmp.diff_cleanupEfficiency(diff_lines)
    diff = list(diff_lines)
    dmp.diff_charsToLines(diff, mapping)
    dump(diff)
    udiff = []
    for d, lines in diff:
        if d < 0:
            d = "-"
        elif d > 0:
            d = "+"
        else:
            d = " "
        for line in lines.splitlines(keepends=True):
            udiff.append(d + line)
    return udiff
 def search_and_replace(texts):
    search_text, replace_text, original_text = texts
    num = original_text.count(search_text)
    # if num > 1:
    #    raise SearchTextNotUnique()
    if num == 0:
        return
    new_text = original_text.replace(search_text, replace_text)
    return new_text
 def git_cherry_pick_osr_onto_o(texts):
    search_text, replace_text, original_text = texts
    with GitTemporaryDirectory() as dname:
        repo = git.Repo(dname)
        fname = Path(dname) / "file.txt"
        # Make O->S->R
        fname.write_text(original_text)
        repo.git.add(str(fname))
        repo.git.commit("-m", "original")
        original_hash = repo.head.commit.hexsha
        fname.write_text(search_text)
        repo.git.add(str(fname))
        repo.git.commit("-m", "search")
        fname.write_text(replace_text)
        repo.git.add(str(fname))
        repo.git.commit("-m", "replace")
        replace_hash = repo.head.commit.hexsha
        # go back to O
        repo.git.checkout(original_hash)
        # cherry pick R onto original
        try:
            repo.git.cherry_pick(replace_hash, "--minimal")
        except git.exc.GitCommandError:
            # merge conflicts!
            return
        new_text = fname.read_text()
        return new_text
 def git_cherry_pick_sr_onto_so(texts):
    search_text, replace_text, original_text = texts
    with GitTemporaryDirectory() as dname:
        repo = git.Repo(dname)
        fname = Path(dname) / "file.txt"
        fname.write_text(search_text)
        repo.git.add(str(fname))
        repo.git.commit("-m", "search")
        search_hash = repo.head.commit.hexsha
        # make search->replace
        fname.write_text(replace_text)
        repo.git.add(str(fname))
        repo.git.commit("-m", "replace")
        replace_hash = repo.head.commit.hexsha
        # go back to search,
        repo.git.checkout(search_hash)
        # make search->original
        fname.write_text(original_text)
        repo.git.add(str(fname))
        repo.git.commit("-m", "original")
        # cherry pick replace onto original
        try:
            repo.git.cherry_pick(replace_hash, "--minimal")
        except git.exc.GitCommandError:
            # merge conflicts!
            return
        new_text = fname.read_text()
        return new_text
 class SearchTextNotUnique(ValueError):
    pass
 all_preprocs = [
    # (strip_blank_lines, relative_indent, reverse_lines)
    (False, False, False),
    (True, False, False),
    (False, True, False),
    (True, True, False),
    # (False, False, True),
    # (True, False, True),
    # (False, True, True),
    # (True, True, True),
 ]
 always_relative_indent = [
    (False, True, False),
    (True, True, False),
    # (False, True, True),
    # (True, True, True),
 ]
 editblock_strategies = [
    (search_and_replace, all_preprocs),
    (git_cherry_pick_osr_onto_o, all_preprocs),
    (dmp_lines_apply, all_preprocs),
 ]
 never_relative = [
    (False, False),
    (True, False),
 ]
 udiff_strategies = [
    (search_and_replace, all_preprocs),
    (git_cherry_pick_osr_onto_o, all_preprocs),
    (dmp_lines_apply, all_preprocs),
 ]
 def flexible_search_and_replace(texts, strategies):
    """Try a series of search/replace methods, starting from the most
    literal interpretation of search_text. If needed, progress to more
    flexible methods, which can accommodate divergence between
    search_text and original_text and yet still achieve the desired
    edits.
    """
    for strategy, preprocs in strategies:
        for preproc in preprocs:
            res = try_strategy(texts, strategy, preproc)
            if res:
                dump(strategy, preproc)
                return res
 def reverse_lines(text):
    lines = text.splitlines(keepends=True)
    lines.reverse()
    return "".join(lines)
 def try_strategy(texts, strategy, preproc):
    preproc_strip_blank_lines, preproc_relative_indent, preproc_reverse = preproc
    ri = None
    if preproc_strip_blank_lines:
        texts = strip_blank_lines(texts)
    if preproc_relative_indent:
        ri, texts = relative_indent(texts)
    if preproc_reverse:
        texts = list(map(reverse_lines, texts))
    res = strategy(texts)
    if res and preproc_reverse:
        res = reverse_lines(res)
    if res and preproc_relative_indent:
        try:
            res = ri.make_absolute(res)
        except ValueError:
            return
    return res
 def strip_blank_lines(texts):
    # strip leading and trailing blank lines
    texts = [text.strip("\n") + "\n" for text in texts]
    return texts
 def read_text(fname):
    text = Path(fname).read_text()
    return text
 def proc(dname):
    dname = Path(dname)
    try:
        search_text = read_text(dname / "search")
        replace_text = read_text(dname / "replace")
        original_text = read_text(dname / "original")
    except FileNotFoundError:
        return
    ####
    texts = search_text, replace_text, original_text
    strategies = [
        # (search_and_replace, all_preprocs),
        # (git_cherry_pick_osr_onto_o, all_preprocs),
        # (git_cherry_pick_sr_onto_so, all_preprocs),
        # (dmp_apply, all_preprocs),
        (dmp_lines_apply, all_preprocs),
    ]
    _strategies = editblock_strategies  # noqa: F841
    short_names = dict(
        search_and_replace="sr",
        git_cherry_pick_osr_onto_o="cp_o",
        git_cherry_pick_sr_onto_so="cp_so",
        dmp_apply="dmp",
        dmp_lines_apply="dmpl",
    )
    patched = dict()
    for strategy, preprocs in strategies:
        for preproc in preprocs:
            method = strategy.__name__
            method = short_names[method]
            strip_blank, rel_indent, rev_lines = preproc
            if strip_blank or rel_indent:
                method += "_"
            if strip_blank:
                method += "s"
            if rel_indent:
                method += "i"
            if rev_lines:
                method += "r"
            res = try_strategy(texts, strategy, preproc)
            patched[method] = res
    results = []
    for method, res in patched.items():
        out_fname = dname / f"original.{method}"
        if out_fname.exists():
            out_fname.unlink()
        if res:
            out_fname.write_text(res)
            correct = (dname / "correct").read_text()
            if res == correct:
                res = "pass"
            else:
                res = "WRONG"
        else:
            res = "fail"
        results.append((method, res))
    return results
 def colorize_result(result):
    colors = {
        "pass": "\033[102;30mpass\033[0m",  # Green background, black text
        "WRONG": "\033[101;30mWRONG\033[0m",  # Red background, black text
        "fail": "\033[103;30mfail\033[0m",  # Yellow background, black text
    }
    return colors.get(result, result)  # Default to original result if not found
 def main(dnames):
    all_results = []
    for dname in tqdm(dnames):
        dname = Path(dname)
        results = proc(dname)
        for method, res in results:
            all_results.append((dname, method, res))
            # print(dname, method, colorize_result(res))
    # Create a 2D table with directories along the right and methods along the top
    # Collect all unique methods and directories
    methods = []
    for _, method, _ in all_results:
        if method not in methods:
            methods.append(method)
    directories = dnames
    # Sort directories by decreasing number of 'pass' results
    pass_counts = {
        dname: sum(
            res == "pass" for dname_result, _, res in all_results if str(dname) == str(dname_result)
        )
        for dname in directories
    }
    directories.sort(key=lambda dname: pass_counts[dname], reverse=True)
    # Create a results matrix
    results_matrix = {dname: {method: "" for method in methods} for dname in directories}
    # Populate the results matrix
    for dname, method, res in all_results:
        results_matrix[str(dname)][method] = res
    # Print the 2D table
    # Print the header
    print("{:<20}".format("Directory"), end="")
    for method in methods:
        print("{:<9}".format(method), end="")
    print()
    # Print the rows with colorized results
    for dname in directories:
        print("{:<20}".format(Path(dname).name), end="")
        for method in methods:
            res = results_matrix[dname][method]
            colorized_res = colorize_result(res)
            res_l = 9 + len(colorized_res) - len(res)
            fmt = "{:<" + str(res_l) + "}"
            print(fmt.format(colorized_res), end="")
        print()
 if __name__ == "__main__":
    status = main(sys.argv[1:])
    sys.exit(status)
--- a/aider/coders/udiff_coder.py
+++ b/aider/coders/udiff_coder.py
@ -0,0 +1,395 @@
 import difflib
 from itertools import groupby
 from pathlib import Path
 from ..dump import dump  # noqa: F401
 from .base_coder import Coder
 from .search_replace import (
    SearchTextNotUnique,
    all_preprocs,
    diff_lines,
    flexible_search_and_replace,
    search_and_replace,
 )
 from .udiff_prompts import UnifiedDiffPrompts
 no_match_error = """UnifiedDiffNoMatch: hunk failed to apply!
 {path} does not contain lines that match the diff you provided!
 Try again.
 DO NOT skip blank lines, comments, docstrings, etc!
 The diff needs to apply cleanly to the lines in {path}!
 {path} does not contain these {num_lines} exact lines in a row:
 ```
 {original}```
 """
 not_unique_error = """UnifiedDiffNotUnique: hunk failed to apply!
 {path} contains multiple sets of lines that match the diff you provided!
 Try again.
 Use additional ` ` lines to provide context that uniquely indicates which code needs to be changed.
 The diff needs to apply to a unique set of lines in {path}!
 {path} contains multiple copies of these {num_lines} lines:
 ```
 {original}```
 """
 class UnifiedDiffCoder(Coder):
    edit_format = "udiff"
    def __init__(self, *args, **kwargs):
        self.gpt_prompts = UnifiedDiffPrompts()
        super().__init__(*args, **kwargs)
    def get_edits(self):
        content = self.partial_response_content
        # might raise ValueError for malformed ORIG/UPD blocks
        raw_edits = list(find_diffs(content))
        last_path = None
        edits = []
        for path, hunk in raw_edits:
            if path:
                last_path = path
            else:
                path = last_path
            edits.append((path, hunk))
        return edits
    def apply_edits(self, edits):
        seen = set()
        uniq = []
        for path, hunk in edits:
            hunk = normalize_hunk(hunk)
            if not hunk:
                continue
            this = [path + "\n"] + hunk
            this = "".join(this)
            if this in seen:
                continue
            seen.add(this)
            uniq.append((path, hunk))
        errors = []
        for path, hunk in uniq:
            full_path = self.abs_root_path(path)
            content = self.io.read_text(full_path)
            original, _ = hunk_to_before_after(hunk)
            try:
                content = do_replace(full_path, content, hunk)
            except SearchTextNotUnique:
                errors.append(
                    not_unique_error.format(
                        path=path, original=original, num_lines=len(original.splitlines())
                    )
                )
                continue
            if not content:
                errors.append(
                    no_match_error.format(
                        path=path, original=original, num_lines=len(original.splitlines())
                    )
                )
                continue
            # SUCCESS!
            self.io.write_text(full_path, content)
        if errors:
            errors = "\n\n".join(errors)
            raise ValueError(errors)
 def do_replace(fname, content, hunk):
    fname = Path(fname)
    before_text, after_text = hunk_to_before_after(hunk)
    # does it want to make a new file?
    if not fname.exists() and not before_text.strip():
        fname.touch()
        content = ""
    if content is None:
        return
    # TODO: handle inserting into new file
    if not before_text.strip():
        # append to existing file, or start a new file
        new_content = content + after_text
        return new_content
    new_content = None
    new_content = apply_hunk(content, hunk)
    if new_content:
        return new_content
 def collapse_repeats(s):
    return "".join(k for k, g in groupby(s))
 def apply_hunk(content, hunk):
    before_text, after_text = hunk_to_before_after(hunk)
    res = directly_apply_hunk(content, hunk)
    if res:
        return res
    hunk = make_new_lines_explicit(content, hunk)
    # just consider space vs not-space
    ops = "".join([line[0] for line in hunk])
    ops = ops.replace("-", "x")
    ops = ops.replace("+", "x")
    ops = ops.replace("\n", " ")
    cur_op = " "
    section = []
    sections = []
    for i in range(len(ops)):
        op = ops[i]
        if op != cur_op:
            sections.append(section)
            section = []
            cur_op = op
        section.append(hunk[i])
    sections.append(section)
    if cur_op != " ":
        sections.append([])
    all_done = True
    for i in range(2, len(sections), 2):
        preceding_context = sections[i - 2]
        changes = sections[i - 1]
        following_context = sections[i]
        res = apply_partial_hunk(content, preceding_context, changes, following_context)
        if res:
            content = res
        else:
            all_done = False
            # FAILED!
            # this_hunk = preceding_context + changes + following_context
            break
    if all_done:
        return content
 def flexi_just_search_and_replace(texts):
    strategies = [
        (search_and_replace, all_preprocs),
    ]
    return flexible_search_and_replace(texts, strategies)
 def make_new_lines_explicit(content, hunk):
    before, after = hunk_to_before_after(hunk)
    diff = diff_lines(before, content)
    back_diff = []
    for line in diff:
        if line[0] == "+":
            continue
        # if line[0] == "-":
        #    line = "+" + line[1:]
        back_diff.append(line)
    new_before = directly_apply_hunk(before, back_diff)
    if not new_before:
        return hunk
    if len(new_before.strip()) < 10:
        return hunk
    before = before.splitlines(keepends=True)
    new_before = new_before.splitlines(keepends=True)
    after = after.splitlines(keepends=True)
    if len(new_before) < len(before) * 0.66:
        return hunk
    new_hunk = difflib.unified_diff(new_before, after, n=max(len(new_before), len(after)))
    new_hunk = list(new_hunk)[3:]
    return new_hunk
 def cleanup_pure_whitespace_lines(lines):
    res = [
        line if line.strip() else line[-(len(line) - len(line.rstrip("\r\n")))] for line in lines
    ]
    return res
 def normalize_hunk(hunk):
    before, after = hunk_to_before_after(hunk, lines=True)
    before = cleanup_pure_whitespace_lines(before)
    after = cleanup_pure_whitespace_lines(after)
    diff = difflib.unified_diff(before, after, n=max(len(before), len(after)))
    diff = list(diff)[3:]
    return diff
 def directly_apply_hunk(content, hunk):
    before, after = hunk_to_before_after(hunk)
    before_lines, _ = hunk_to_before_after(hunk, lines=True)
    before_lines = "".join([line.strip() for line in before_lines])
    # Refuse to do a repeated search and replace on a tiny bit of non-whitespace context
    if len(before_lines) < 10 and content.count(before) > 1:
        return
    try:
        new_content = flexi_just_search_and_replace([before, after, content])
    except SearchTextNotUnique:
        new_content = None
    return new_content
 def apply_partial_hunk(content, preceding_context, changes, following_context):
    len_prec = len(preceding_context)
    len_foll = len(following_context)
    use_all = len_prec + len_foll
    for drop in range(use_all):
        use = use_all - drop
        for use_prec in range(len_prec, -1, -1):
            if use_prec > use:
                continue
            use_foll = use - use_prec
            if use_foll > len_foll:
                continue
            if use_prec:
                this_prec = preceding_context[-use_prec:]
            else:
                this_prec = []
            this_foll = following_context[:use_foll]
            res = directly_apply_hunk(content, this_prec + changes + this_foll)
            if res:
                return res
 def find_diffs(content):
    # We can always use triple-quotes, because all the udiff content
    # is prefixed with +/-/space.
    if not content.endswith("\n"):
        content = content + "\n"
    lines = content.splitlines(keepends=True)
    line_num = 0
    edits = []
    while line_num < len(lines):
        while line_num < len(lines):
            line = lines[line_num]
            if line.startswith("```diff"):
                line_num, these_edits = process_fenced_block(lines, line_num + 1)
                edits += these_edits
                break
            line_num += 1
    # For now, just take 1!
    # edits = edits[:1]
    return edits
 def process_fenced_block(lines, start_line_num):
    for line_num in range(start_line_num, len(lines)):
        line = lines[line_num]
        if line.startswith("```"):
            break
    block = lines[start_line_num:line_num]
    block.append("@@ @@")
    if block[0].startswith("--- "):
        fname = block[0].split()[1]
        block = block[2:]
    else:
        fname = None
    edits = []
    keeper = False
    hunk = []
    op = " "
    for line in block:
        hunk.append(line)
        if len(line) < 2:
            continue
        op = line[0]
        if op in "-+":
            keeper = True
            continue
        if op != "@":
            continue
        if not keeper:
            hunk = []
            continue
        hunk = hunk[:-1]
        edits.append((fname, hunk))
        hunk = []
    return line_num + 1, edits
 def hunk_to_before_after(hunk, lines=False):
    before = []
    after = []
    op = " "
    for line in hunk:
        if len(line) < 2:
            op = " "
            line = line
        else:
            op = line[0]
            line = line[1:]
        if op == " ":
            before.append(line)
            after.append(line)
        elif op == "-":
            before.append(line)
        elif op == "+":
            after.append(line)
    if lines:
        return before, after
    before = "".join(before)
    after = "".join(after)
    return before, after
--- a/aider/coders/udiff_prompts.py
+++ b/aider/coders/udiff_prompts.py
@ -0,0 +1,105 @@
 # flake8: noqa: E501
 from .base_prompts import CoderPrompts
 class UnifiedDiffPrompts(CoderPrompts):
    main_system = """Act as an expert software developer.
 You are diligent and tireless, and you always COMPLETELY IMPLEMENT the needed code.
 Always use best practices when coding.
 Respect and use existing conventions, libraries, etc that are already present in the code base.
 Take requests for changes to the supplied code.
 If the request is ambiguous, ask questions.
 For each file that needs to be changed, write out the changes similar to a unified diff like `diff -U0` would produce. For example:
 # Example conversation 1
 ## USER: Replace is_prime with a call to sympy.
 ## ASSISTANT: Ok, I will:
 1. Add an imports of sympy.
 2. Remove the is_prime() function.
 3. Replace the existing call to is_prime() with a call to sympy.isprime().
 Here are the diffs for those changes:
 ```diff
 --- mathweb/flask/app.py
 +++ mathweb/flask/app.py
@@ ... @@
 -class MathWeb:
 +import sympy
 +
 +class MathWeb:
@@ ... @@
 -def is_prime(x):
 -    if x < 2:
 -        return False
 -    for i in range(2, int(math.sqrt(x)) + 1):
 -        if x % i == 0:
 -            return False
 -    return True
@@ ... @@
 -@app.route('/prime/<int:n>')
 -def nth_prime(n):
 -    count = 0
 -    num = 1
 -    while count < n:
 -        num += 1
 -        if is_prime(num):
 -            count += 1
 -    return str(num)
 +@app.route('/prime/<int:n>')
 +def nth_prime(n):
 +    count = 0
 +    num = 1
 +    while count < n:
 +        num += 1
 +        if sympy.isprime(num):
 +            count += 1
 +    return str(num)
 ```
 """
    system_reminder = """# File editing rules:
 Return edits similar to unified diffs that `diff -U0` would produce.
 Make sure you include the first 2 lines with the file paths.
 Don't include timestamps with the file paths.
 Start each hunk of changes with a `@@ ... @@` line.
 Don't include line numbers like `diff -U0` does.
 The user's patch tool doesn't need them.
 The user's patch tool needs CORRECT patches that apply cleanly against the current contents of the file!
 Think carefully and make sure you include and mark all lines that need to be removed or changed as `-` lines.
 Make sure you mark all new or modified lines with `+`.
 Don't leave out any lines or the diff patch won't apply correctly.
 Indentation matters in the diffs!
 Start a new hunk for each section of the file that needs changes.
 Only output hunks that specify changes with `+` or `-` lines.
 Skip any hunks that are entirely unchanging ` ` lines.
 Output hunks in whatever order makes the most sense.
 Hunks don't need to be in any particular order.
 When editing a function, method, loop, etc use a hunk to replace the *entire* code block.
 Delete the entire existing version with `-` lines and then add a new, updated version with `+` lines.
 This will help you generate correct code and correct diffs.
 """
    files_content_prefix = "These are the *read-write* files:\n"
    files_no_full_files = "I am not sharing any *read-write* files yet."
    repo_content_prefix = """Below here are summaries of other files present in this git repository.
 Do not propose changes to these files, they are *read-only*.
 To make a file *read-write*, ask the user to *add it to the chat*.
 """
--- a/benchmark/refactor_tools.py
+++ b/benchmark/refactor_tools.py
@ -0,0 +1,208 @@
 #!/usr/bin/env python
 import ast
 import os
 import shutil
 import sys
 from pathlib import Path
 from aider.dump import dump  # noqa: F401
 class ParentNodeTransformer(ast.NodeTransformer):
    """
    This transformer sets the 'parent' attribute on each node.
    """
    def generic_visit(self, node):
        for child in ast.iter_child_nodes(node):
            child.parent = node
        return super(ParentNodeTransformer, self).generic_visit(node)
 def verify_full_func_at_top_level(tree, func, func_children):
    func_node = next(
        (
            item
            for item in ast.walk(tree)
            if isinstance(item, ast.FunctionDef) and item.name == func
        ),
        None,
    )
    assert func_node is not None, f"Function {func} not found"
    assert isinstance(
        func_node.parent, ast.Module
    ), f"{func} is not a top level function, it has parent {func_node.parent}"
    num_children = sum(1 for _ in ast.walk(func_node))
    pct_diff_children = abs(num_children - func_children) * 100 / func_children
    assert (
        pct_diff_children < 10
    ), f"Old method had {func_children} children, new method has {num_children}"
 def verify_old_class_children(tree, old_class, old_class_children):
    node = next(
        (
            item
            for item in ast.walk(tree)
            if isinstance(item, ast.ClassDef) and item.name == old_class
        ),
        None,
    )
    assert node is not None, f"Old class {old_class} not found"
    num_children = sum(1 for _ in ast.walk(node))
    pct_diff_children = abs(num_children - old_class_children) * 100 / old_class_children
    assert (
        pct_diff_children < 10
    ), f"Old class had {old_class_children} children, new class has {num_children}"
 def verify_refactor(fname, func, func_children, old_class, old_class_children):
    with open(fname, "r") as file:
        file_contents = file.read()
    tree = ast.parse(file_contents)
    ParentNodeTransformer().visit(tree)  # Set parent attribute for all nodes
    verify_full_func_at_top_level(tree, func, func_children)
    verify_old_class_children(tree, old_class, old_class_children - func_children)
 ############################
 class SelfUsageChecker(ast.NodeVisitor):
    def __init__(self):
        self.non_self_methods = []
        self.parent_class_name = None
        self.num_class_children = 0
    def visit_FunctionDef(self, node):
        # Check if the first argument is 'self' and if it's not used
        if node.args.args and node.args.args[0].arg == "self":
            self_used = any(
                isinstance(expr, ast.Name) and expr.id == "self"
                for stmt in node.body
                for expr in ast.walk(stmt)
            )
            super_used = any(
                isinstance(expr, ast.Name) and expr.id == "super"
                for stmt in node.body
                for expr in ast.walk(stmt)
            )
            if not self_used and not super_used:
                # Calculate the number of child nodes in the function
                num_child_nodes = sum(1 for _ in ast.walk(node))
                res = (
                    self.parent_class_name,
                    node.name,
                    self.num_class_children,
                    num_child_nodes,
                )
                self.non_self_methods.append(res)
        self.generic_visit(node)
    def visit_ClassDef(self, node):
        self.parent_class_name = node.name
        self.num_class_children = sum(1 for _ in ast.walk(node))
        self.generic_visit(node)
 def find_python_files(path):
    if os.path.isfile(path) and path.endswith(".py"):
        return [path]
    elif os.path.isdir(path):
        py_files = []
        for root, dirs, files in os.walk(path):
            for file in files:
                if file.endswith(".py"):
                    full_path = os.path.join(root, file)
                    py_files.append(full_path)
        return py_files
    else:
        return []
 def find_non_self_methods(path):
    python_files = find_python_files(path)
    non_self_methods = []
    for filename in python_files:
        with open(filename, "r") as file:
            node = ast.parse(file.read(), filename=filename)
            checker = SelfUsageChecker()
            checker.visit(node)
            for method in checker.non_self_methods:
                non_self_methods.append([filename] + list(method))
    return non_self_methods
 def process(entry):
    fname, class_name, method_name, class_children, method_children = entry
    if method_children > class_children / 2:
        return
    if method_children < 100:
        return
    fname = Path(fname)
    if "test" in fname.stem:
        return
    print(f"{fname} {class_name} {method_name} {class_children} {method_children}")
    dname = Path("tmp.benchmarks/refactor-benchmark")
    dname.mkdir(exist_ok=True)
    dname = dname / f"{fname.stem}_{class_name}_{method_name}"
    dname.mkdir(exist_ok=True)
    shutil.copy(fname, dname / fname.name)
    docs_dname = dname / ".docs"
    docs_dname.mkdir(exist_ok=True)
    ins_fname = docs_dname / "instructions.md"
    ins_fname.write_text(f"""# Refactor {class_name}.{method_name}
 Refactor the `{method_name}` method in the `{class_name}` class to be a stand alone, top level function.
 Name the new function `{method_name}`, exactly the same name as the existing method.
 Update any existing `self.{method_name}` calls to work with the new `{method_name}` function.
 """)  # noqa: E501
    test_fname = dname / f"{fname.stem}_test.py"
    test_fname.write_text(f"""
 import unittest
 from benchmark.refactor_tools import verify_refactor
 from pathlib import Path
 class TheTest(unittest.TestCase):
    def test_{method_name}(self):
        fname = Path(__file__).parent / "{fname.name}"
        method = "{method_name}"
        method_children = {method_children}
        class_name = "{class_name}"
        class_children = {class_children}
        verify_refactor(fname, method, method_children, class_name, class_children)
 if __name__ == "__main__":
    unittest.main()
 """)
 def main(paths):
    for path in paths:
        methods = find_non_self_methods(path)
        # methods = sorted(methods, key=lambda x: x[4])
        for method in methods:
            process(method)
 if __name__ == "__main__":
    main(sys.argv[1:])
--- a/docs/unified-diffs.md
+++ b/docs/unified-diffs.md
@ -0,0 +1,440 @@
 # Fixing GPT-4 Turbo laziness with unified diffs
 ![robot flowchart](../assets/robot-ast.png)
 Aider now asks GPT-4 Turbo to use [unified diffs](https://www.gnu.org/software/diffutils/manual/html_node/Unified-Format.html) to edit your code when you request new features, improvements, bug fixes, test cases, etc.
 This new support for unified diffs massively reduces GPT-4 Turbo's habit of being a "lazy" coder.
 There are abundant anecdotes
 about GPT-4 Turbo writing half completed code filled with comments that give
 homework assignments to the user
 like "...omitted for brevity..." or "...add logic here...".
 Aider's new unified diff edit format significantly reduces this sort of lazy coding,
 producing much better quantitative scores on a new "laziness benchmark".
 Before trying to reduce laziness, I needed a way to quantify and measure
 the problem.
 I developed a new
 benchmarking suite designed to both provoke and quantify lazy coding.
 It consists of 39 python refactoring tasks,
 which ask GPT to remove a non-trivial method from a class and make it
 a stand alone function.
 GPT-4 Turbo is prone to being lazy on this sort of task, because it's mostly a
 "cut & paste" of code from one place in a file to another.
 GPT often creates the new function with a body that is empty except for
 a comment like
 "...include the body of the original method..."
 This new laziness benchmark produced the following results with `gpt-4-1106-preview`:
 - **GPT-4 Turbo only scored 15% as a baseline** using aider's existing "SEARCH/REPLACE block" edit format. This confirms the anecdotes that GPT-4 Turbo is quite lazy when coding, and serves as a baseline for comparison.
 - **Aider's new unified diff edit format raised the score to 65%**.
 - **A system prompt based on widely circulated folklore only scored 15%, same as the baseline.** This experiment used the existing "SEARCH/REPLACE block" format with an additional prompt that claims the user is blind, has no hands, will tip $2000 and has suffered from "truncated code trauma".
 The older `gpt-4-0613` also did better on the laziness benchmark by using unified diffs.
 The benchmark was designed to work with large source code files, many of
 which exceeded GPT-4's 8k context window.
 This meant that 28% of tasks exhausted the context window and were marked as a fail,
 significantly dragging down GPT-4's performance on the benchmark.
 - **GPT-4's baseline was 26%** using aider's existing "SEARCH/REPLACE block" edit format.
 - **Aider's new unified diff edit format raised GPT-4's score to 59%**. 
 Before settling on unified diffs,
 I explored many other approaches to stop GPT-4 Turbo from eliding code
 and replacing it with comments.
 These efforts included prompts about being tireless and diligent,
 use of OpenAI's function/tool calling capabilities and numerous variations on
 aider's existing editing formats and other diff-like formats.
 All in all, the results shared here reflect
 an extensive investigation of possible solutions and
 a large number of benchmarking runs of numerous varied approaches against
 GPT-4 Turbo.
 The result is aider's new support for a unified diff like
 editing format which outperforms other potential solutions by a wide margin.
 The rest of this article will describe aider's new refactoring benchmark
 and the new unified diff editing format.
 We will discuss some key design decisions involved in this new format,
 and evaluate their significance using ablation experiments.
 ## Refactoring benchmark
 Aider has long used a
 [benchmark suite based on 133 Exercism python exercises]().
 But these are mostly small coding problems,
 usually requiring only a few dozen lines of code to solve.
 GPT-4 Turbo was typically only lazy on 2-3 of these exercises:
 the ones with the largest amount of code and which involved refactoring.
 Rather than fully completing the refactor, GPT would often
 just add a comment
 referencing old code like
 "...copy $USD formatting code here...".
 Based on this observation, I set out to build a benchmark based on refactoring
 a non-trivial amount of code from within fairly large source files.
 To do this, I used python's `ast` module to analyze the
 [Django repository]().
 The goal was to search the Django repository to:
 - Find source files that contain class methods which are non-trivial, having more than 100 AST nodes in their implementation.
 - Focus on methods that are a smaller piece of a larger class, so they don't represent the bulk of the code in their class or the file. We want to find methods which are less than half the AST nodes present in their containing class.
 - Find methods that do not make any use of their `self` parameter. This means they can be trivially refactored out of the class and turned into a stand-alone top-level function.
 We can then turn each of these source files into a task for the benchmark,
 using instructions like:
 > Refactor the `_set_csrf_cookie` method in the `CsrfViewMiddleware` class to be a stand alone, top level function.
 > Name the new function `_set_csrf_cookie`, exactly the same name as the existing method.
 > Update any existing `self._set_csrf_cookie` calls to work with the new `_set_csrf_cookie` function.
 A [simple python AST scanning script]() found 39 of these source files in the Django repository
 and packaged them up as benchmark tasks using
 the same format as Exercism exercises.
 The tool also created a unit test for each task
 which again uses the `ast` module to check that the refactor
 was performed roughly correctly:
 - The updated source file must parse as correct python, without `SyntaxError` or `IndentationError` exceptions. This is a powerful check that will surface any mechanical errors made when attempting to edit the source code.
 - The target method must now exist as a top-level function in the file.
 - This new top-level function must contain approximately the same number of AST nodes as the original class method. This ensures that GPT didn't elide code and replace it with comments.
 - The original class must still be present in the file, and it must be smaller by about the number of AST nodes of the method which was removed. This helps confirm that the method was removed from the class, without other significant modifications.
 To be clear, this is not a rigorous test that the refactor was performed correctly.
 But it does serve as a basic sanity check that the refactor was essentially done as a cut & paste, without eliding any code as comments.
 And it correlates well with other laziness metrics
 gathered during benchmarking like the
 introduction of new comments that contain "...".
 The result is a pragmatic benchmark suite that provokes, detects and quantifies laziness.
 ## Unified diff editing format
 The design and implementation of aider's new unified diff editing format
 helped clarify some general principles, which I think are applicable to any effective
 GPT-4 code editing format:
 - FAMILIAR - Choose an edit format that GPT is already familiar with.
 - SIMPLE - Choose a simple format that avoid escaping, syntactic overhead and brittle specifiers like line numbers or line counts.
 - HIGH LEVEL - Encourage GPT to structure edits as new versions of substantive code blocks (functions, methods, etc), not as a series of surgical/minimal changes to individual lines of code.
 - FLEXIBLE - Strive to be maximally flexible when interpreting GPT's edit instructions.
 A helpful shortcut here is to have empathy for GPT, and imagine you are on
 the other end of the conversation being tasked with specifying code edits.
 Would you want to hand type a properly escaped json data structure
 to specify surgical insert, delete, replace operations on specific code line numbers?
 Would you want a typo, off-by-one line number or flubbed escape character to trigger an error
 and force you to start over?
 GPT is quantitatively better at code editing when you reduce the
 burden of formatting edits by using a familiar, simple, high level
 and flexible editing format.
 ### Choose a familiar editing format
 Unified diffs are perhaps the most commonly used format for showing
 how source code files have been changed.
 This is because it is the default output format of `git diff`:
 ```diff
 $ git diff hello.py
 ...
 --- a/hello.py
 +++ b/hello.py
@@ -1,5 +1,5 @@
 def main(args):
     # show a greeting
 -    print("Hello!")
 +    print("Goodbye!")
     return
 ```
 Choosing such a familiar, popular output format means that GPT has
 seen *many* examples in its training data.
 GPT has therefore been extensively trained to generate
 text that conforms to the unified diff syntax.
 We won't need to provide many details and examples
 in the system prompt, as it knows this format by name.
 Unified diffs are
 usually intended to be consumed by the
 [patch](https://www.gnu.org/software/diffutils/manual/html_node/Merging-with-patch.html)
 program.
 They need to *accurately* reflect the original and updated file contents,
 otherwise the patch command will fail to apply the changes.
 Having GPT specify changes in a well-known format that is usually consumed by a
 fairly rigid program like patch
 seems to discourage it from
 leaving informal editing instructions in comments
 and being lazy
 about writing all the needed code.
 With unified diffs, GPT acts more like it's writing textual data intended to be read by a program,
 not talking to a person.
 ### Use a simple editing format
 Aider's [previous benchmark results](https://aider.chat/docs/benchmarks.html) made
 it clear that simple editing formats
 work much better than complex ones.
 Even though OpenAI provides extensive support for
 structured formats like json and function calls,
 GPT is worse at editing code if you use them.
 I repeated these and many other similar benchmarks against GPT-4 Turbo,
 and again reached these same conclusions.
 Informally, this is probably because stuffing *source code* into JSON is complicated
 and error prone.
 It likely takes a lot of the model's attention to escape and wrap code
 in JSON containers.
 Wrapping the python code
 `print("On Windows use \"C:\\\"")`
 as valid json is pretty painful and error prone:
 `"print(\\"On Windows use \\"C:\\\\\\"\\")"`
 Due to escaping issues GPT's code is often syntactically incorrect when it's
 unpacked from the JSON container or the JSON decode just fails entirely.
 On the other hand, the core of the unified diff format is extremely simple.
 You include a hunk of the file that needs to be changed,
 with every line prefixed by ether a *space* ` `, a *plus* `+` or a *minus* `-`.
 These markers indicate an unchanged line, a new line to add or an existing line to remove.
 There is no escaping, and very little other structure needed
 to create a unified diff.
 A unified diff looks pretty much like the code it is modifying.
 The one complicated piece is the line numbers found at the start
 of each hunk that look something like this: `@@ -2,4 +3,5 @@`.
 This example is from a
 hunk that will change lines 2-4 in the original file
 into what will become lines 3-5 in the updated file.
 You've probably read a lot of unified diffs without ever
 caring about these line numbers,
 because the diffs are usually perfectly sensible without them.
 This is good news, because we're going to discard these numbers.
 GPT is terrible at working accurately with source code line numbers.
 This is a general observation about any use of line
 numbers in editing formats,
 backed up by many quantitative benchmark
 experiments.
 Specifically regarding line numbers in unified diffs,
 GPT is frequently off-by-one, or labels a hunk as
 being line numbers 2-4 of the file but the hunk actually contains 6 lines, etc.
 GPT-4 isn't even close to being able to consistently
 produce valid
 line number headers.
 Doing so requires far too much attention to numerical details to ensure
 correctness and self-consistency.
 So aider tells GPT not to include line numbers.
 Instead, aider just interprets each hunk from the unified diffs
 as a search and replace operation:
 This diff:
 ```diff
@@ ... @@
 def main(args):
     # show a greeting
 -    print("Hello!")
 +    print("Goodbye!")
     return
 ```
 Means we want to search the original source file for all the
 *space* ` ` and *minus* `-` lines from the hunk:
 ```python
 def main(args):
    # show a greeting
    print("Hello!")
    return
 ```
 And then replace them with all the *space* ` ` and *plus* `+` lines:
 ```python
 def main(args):
    # show a greeting
    print("Goodbye!")
    return
 ```
 Simple, right?
 ## Encourage high level edits
 The example unified diffs we've seen so far have all been single line changes,
 which makes them pretty easy to read and understand.
 Consider this slightly more complex change, which renames the variable `n` to
 `number`:
 ``` diff
@@ ... @@
 -def factorial(n):
 +def factorial(number):
     "compute factorial"
 -    if n == 0:
 +    if number == 0:
         return 1
     else:
 -        return n * factorial(n-1)
 +        return number * factorial(number-1)
 ```
 The following "high level diff" of the same
 change is not as succinct as the minimal diff above,
 but it is much easier to see two different coherent versions of the
 `factorial()` function.
 ``` diff
@@ ... @@
 -def factorial(n):
 -    "compute factorial"
 -
 -    if n == 0:
 -        return 1
 -    else:
 -        return n * factorial(n-1)
 +def factorial(number):
 +    "compute factorial"
 +
 +    if number == 0:
 +        return 1
 +    else:
 +        return number * factorial(number-1)
 ```
 Aider's system prompt strongly encourages
 GPT to produce this kind of high level diff, and provides a few examples.
 GPT is much more successful at code editing
 with the addition of this "high level diff" prompting.
 It is better at producing correct diffs, which can be successfully
 applied to the original file.
 **Experiments without "high level diff" prompting
 measure a 30-50% increase in editing errors,**
 where diffs fail to apply or apply incorrectly and
 produce invalid code.
 Each such editing error causes a round trip back to GPT,
 asking for better diffs.
 These extra round trips slow down the pair programming experience
 and increase token costs.
 There are probably a couple of reasons why high level diffs
 improve code editing performance:
 - It is easier to produce diffs that both correctly match the original code and correctly produce the intended new code. There is less risk of getting confused while generating a rapid fire series of minimal, surgical edits mixed into existing code.
 - The high level hunks often contain more lines than a surgical version, so they are less likely to accidentally match unrelated parts of the original file. This is important because GPT can't reliably give us line numbers to specify exactly where in the file to make the change.
 ### Be flexible when applying edits
 GPT frequently makes errors when generating diffs, which
 can prevent them from being correctly
 applied as edits to the source files.
 These imperfect diffs exhibit a variety of problems:
 - GPT forgets to include some semantically irrelevant lines or details. Often GPT forgets things like comments, docstrings, blank lines, etc. Or it skips over some code that it doesn't intend to change.
 - GPT forgets the leading *plus* `+` character to mark novel lines that it wants to add to the file, and incorrectly includes them with a leading *space* ` `.
 - GPT jumps ahead to a new part of the file without starting a new hunk with a `@@ ... @@` divider.
 As an example of the first issue, consider this source code:
 ```python
 import sys
 def main(args):
    # show a greeting
    print("Hello!")
    return
 main(sys.argv[1:])
 ```
 GPT might produce a unified diff like the one below,
 which is missing the "show a greeting" comment line.
 When we search for the *minus* `-` lines, we won't find them
 in the original file
 because of the missing comment.
 ```diff
@@ ... @@
 -def main(args):
 -
 -    print("Hello!")
 -    return
 +def main(args):
 +
 +    print("Goodbye!")
 +    return
 ```
 Aider tries to be very flexible when applying unified diffs,
 in order to handle all these sorts of defects.
 If a hunk doesn't apply cleanly, aider uses a number of strategies
 to try and apply the edit intended by GPT:
 - Normalize the hunk, by taking the *minus* `-` and *space* ` ` lines as one version of the hunk and the *space* ` ` and *plus* `+` lines as a second version and doing an actual unified diff on them.
 - Try and discover new lines that GPT is trying to add but which it forgot to mark with *plus* `+` markers. This is done by diffing the *minus* `-` and *space* ` ` lines back against the original file.
 - Break a large hunk apart into an overlapping sequence of smaller hunks, which each contain only one contiguous run of *plus* `+` and *minus* `-` lines. Try and apply each of these sub-hunks independently.
 - Vary the size and offset of the "context window" of *space* ` ` lines from the hunk that are used to localize the edit to a specific part of the file.
 - Combine the above mechanisms to progressively become more permissive about how to apply the hunk.
 These flexible patching strategies are critical to successfully apply the
 unified diffs that GPT produces.
 Removing support for flexible patching
 radically increases the number of hunks which fail to apply.
 Each such editing error causes a round trip back to GPT,
 asking for better diffs.
 These extra round trips slow down the pair programming experience
 and increase token costs.
 **Experiments where flexible patching is disabled** quantify the importance of this
 feature:
 - **GPT-4 Turbo's performance drops from 65% down to 56%** on the refactoring benchmark.
 - **We see a 9X increase in editing errors** on aider's original Exercism benchmark.
 ## Conclusions and future work
 Aider's new unified diff format seems very effective at stopping
 GPT-4 Turbo from being a lazy coder.
 I suspect that anyone who has tried to have GPT edit code
 started out asking for diffs of some kind.
 I know I did.
 Any naive attempt to use actual unified diffs
 or any other strict diff format
 is certainly doomed,
 but the techniques described here and
 now incorporated into aider provide
 a highly effective solution.
 There could be significant benefits to
 fine tuning models on
 the simpler, high level style of diffs that are described here.
 Dropping the line numbers and focusing on diffs of
 semantically coherent chunks of code
 seems to be an important part of successful GPT code editing.
 Most LLMs will have already seen plenty of unified diffs
 in their normal training data, and so should be
 very amenable to fining tuning towards this
 particular style of diff.