From 5e63ce335285ea63b24ecdbf008a4926a8ba213a Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 20 Jun 2023 17:04:06 -0700 Subject: [PATCH] refac --- aider/coders/base.py | 1 - aider/coders/editblock.py | 313 +++++++++++++++++++++++++++++++++++++- aider/utils.py | 307 ------------------------------------- 3 files changed, 309 insertions(+), 312 deletions(-) diff --git a/aider/coders/base.py b/aider/coders/base.py index cc8f49ab1..31a7061c6 100755 --- a/aider/coders/base.py +++ b/aider/coders/base.py @@ -37,7 +37,6 @@ class Coder: from . import EditBlockCoder, WholeFileCoder if edit_format == "diff": - dump("here") return EditBlockCoder(*args, **kwargs) elif edit_format == "whole": return WholeFileCoder(*args, **kwargs) diff --git a/aider/coders/editblock.py b/aider/coders/editblock.py index 1e0d63bdd..f35b8d68a 100644 --- a/aider/coders/editblock.py +++ b/aider/coders/editblock.py @@ -1,8 +1,9 @@ +import math import os +import re +from difflib import SequenceMatcher from pathlib import Path -from aider import utils - from ..editors import EditBlockPrompts from .base import Coder @@ -17,7 +18,7 @@ class EditBlockCoder(Coder): def update_files(self, content): # might raise ValueError for malformed ORIG/UPD blocks - edits = list(utils.find_original_update_blocks(content)) + edits = list(find_original_update_blocks(content)) edited = set() for path, original, updated in edits: @@ -50,7 +51,7 @@ class EditBlockCoder(Coder): self.repo.git.add(full_path) edited.add(path) - if utils.do_replace(full_path, original, updated, self.dry_run): + if do_replace(full_path, original, updated, self.dry_run): if self.dry_run: self.io.tool_output(f"Dry run, did not apply edit to {path}") else: @@ -59,3 +60,307 @@ class EditBlockCoder(Coder): self.io.tool_error(f"Failed to apply edit to {path}") return edited + + +def try_dotdotdots(whole, part, replace): + """ + See if the edit block has ... lines. + If not, return none. + + If yes, try and do a perfect edit with the ... chunks. + If there's a mismatch or otherwise imperfect edit, raise ValueError. + + If perfect edit succeeds, return the updated whole. + """ + + dots_re = re.compile(r"(^\s*\.\.\.\n)", re.MULTILINE | re.DOTALL) + + part_pieces = re.split(dots_re, part) + replace_pieces = re.split(dots_re, replace) + + if len(part_pieces) != len(replace_pieces): + raise ValueError("Unpaired ... in edit block") + + if len(part_pieces) == 1: + # no dots in this edit block, just return None + return + + # Compare odd strings in part_pieces and replace_pieces + all_dots_match = all(part_pieces[i] == replace_pieces[i] for i in range(1, len(part_pieces), 2)) + + if not all_dots_match: + raise ValueError("Unmatched ... in edit block") + + part_pieces = [part_pieces[i] for i in range(0, len(part_pieces), 2)] + replace_pieces = [replace_pieces[i] for i in range(0, len(replace_pieces), 2)] + + pairs = zip(part_pieces, replace_pieces) + for part, replace in pairs: + if not part and not replace: + continue + + if not part and replace: + if not whole.endswith("\n"): + whole += "\n" + whole += replace + continue + + if whole.count(part) != 1: + raise ValueError( + "No perfect matching chunk in edit block with ... or part appears more than once" + ) + + whole = whole.replace(part, replace, 1) + + return whole + + +def replace_part_with_missing_leading_whitespace(whole, part, replace): + whole_lines = whole.splitlines() + part_lines = part.splitlines() + replace_lines = replace.splitlines() + + # If all lines in the part start with whitespace, then honor it. + # But GPT often outdents the part and replace blocks completely, + # thereby discarding the actual leading whitespace in the file. + if all((len(pline) > 0 and pline[0].isspace()) for pline in part_lines): + return + + for i in range(len(whole_lines) - len(part_lines) + 1): + leading_whitespace = "" + for j, c in enumerate(whole_lines[i]): + if c == part_lines[0][0]: + leading_whitespace = whole_lines[i][:j] + break + + if not leading_whitespace or not all(c.isspace() for c in leading_whitespace): + continue + + matched = all( + whole_lines[i + k].startswith(leading_whitespace + part_lines[k]) + for k in range(len(part_lines)) + ) + + if matched: + replace_lines = [ + leading_whitespace + rline if rline else rline for rline in replace_lines + ] + whole_lines = whole_lines[:i] + replace_lines + whole_lines[i + len(part_lines) :] + return "\n".join(whole_lines) + "\n" + + return None + + +def replace_most_similar_chunk(whole, part, replace): + res = replace_part_with_missing_leading_whitespace(whole, part, replace) + if res: + return res + + if part in whole: + return whole.replace(part, replace) + + try: + res = try_dotdotdots(whole, part, replace) + except ValueError: + return + + if res: + return res + + similarity_thresh = 0.8 + + max_similarity = 0 + most_similar_chunk_start = -1 + most_similar_chunk_end = -1 + + whole_lines = whole.splitlines() + part_lines = part.splitlines() + + scale = 0.1 + min_len = math.floor(len(part_lines) * (1 - scale)) + max_len = math.ceil(len(part_lines) * (1 + scale)) + + for length in range(min_len, max_len): + for i in range(len(whole_lines) - length + 1): + chunk = whole_lines[i : i + length] + chunk = "\n".join(chunk) + + similarity = SequenceMatcher(None, chunk, part).ratio() + + if similarity > max_similarity and similarity: + max_similarity = similarity + most_similar_chunk_start = i + most_similar_chunk_end = i + length + + if max_similarity < similarity_thresh: + return + + replace_lines = replace.splitlines() + + modified_whole = ( + whole_lines[:most_similar_chunk_start] + + replace_lines + + whole_lines[most_similar_chunk_end:] + ) + modified_whole = "\n".join(modified_whole) + + if whole.endswith("\n"): + modified_whole += "\n" + + return modified_whole + + +def strip_quoted_wrapping(res, fname=None): + """ + Given an input string which may have extra "wrapping" around it, remove the wrapping. + For example: + + filename.ext + ``` + We just want this content + Not the filename and triple quotes + ``` + """ + if not res: + return res + + res = res.splitlines() + + if fname and res[0].strip().endswith(Path(fname).name): + res = res[1:] + + if res[0].startswith("```") and res[-1].startswith("```"): + res = res[1:-1] + + res = "\n".join(res) + if res and res[-1] != "\n": + res += "\n" + + return res + + +def do_replace(fname, before_text, after_text, dry_run=False): + before_text = strip_quoted_wrapping(before_text, fname) + after_text = strip_quoted_wrapping(after_text, fname) + fname = Path(fname) + + # does it want to make a new file? + if not fname.exists() and not before_text.strip(): + fname.touch() + + content = fname.read_text() + + if not before_text.strip(): + # append to existing file, or start a new file + new_content = content + after_text + else: + new_content = replace_most_similar_chunk(content, before_text, after_text) + if not new_content: + return + + if not dry_run: + fname.write_text(new_content) + + return True + + +ORIGINAL = "<<<<<<< ORIGINAL" +DIVIDER = "=======" +UPDATED = ">>>>>>> UPDATED" + +separators = "|".join([ORIGINAL, DIVIDER, UPDATED]) + +split_re = re.compile(r"^((?:" + separators + r")[ ]*\n)", re.MULTILINE | re.DOTALL) + + +def find_original_update_blocks(content): + # make sure we end with a newline, otherwise the regex will miss <>>>>>> UPDATED +``` + +Hope you like it! +""" + print(list(find_original_update_blocks(edit))) diff --git a/aider/utils.py b/aider/utils.py index c1053bec1..e23d6b866 100644 --- a/aider/utils.py +++ b/aider/utils.py @@ -1,159 +1,8 @@ -import math -import re -from difflib import SequenceMatcher from pathlib import Path from .dump import dump # noqa: F401 -def try_dotdotdots(whole, part, replace): - """ - See if the edit block has ... lines. - If not, return none. - - If yes, try and do a perfect edit with the ... chunks. - If there's a mismatch or otherwise imperfect edit, raise ValueError. - - If perfect edit succeeds, return the updated whole. - """ - - dots_re = re.compile(r"(^\s*\.\.\.\n)", re.MULTILINE | re.DOTALL) - - part_pieces = re.split(dots_re, part) - replace_pieces = re.split(dots_re, replace) - - if len(part_pieces) != len(replace_pieces): - raise ValueError("Unpaired ... in edit block") - - if len(part_pieces) == 1: - # no dots in this edit block, just return None - return - - # Compare odd strings in part_pieces and replace_pieces - all_dots_match = all(part_pieces[i] == replace_pieces[i] for i in range(1, len(part_pieces), 2)) - - if not all_dots_match: - raise ValueError("Unmatched ... in edit block") - - part_pieces = [part_pieces[i] for i in range(0, len(part_pieces), 2)] - replace_pieces = [replace_pieces[i] for i in range(0, len(replace_pieces), 2)] - - pairs = zip(part_pieces, replace_pieces) - for part, replace in pairs: - if not part and not replace: - continue - - if not part and replace: - if not whole.endswith("\n"): - whole += "\n" - whole += replace - continue - - if whole.count(part) != 1: - raise ValueError( - "No perfect matching chunk in edit block with ... or part appears more than once" - ) - - whole = whole.replace(part, replace, 1) - - return whole - - -def replace_part_with_missing_leading_whitespace(whole, part, replace): - whole_lines = whole.splitlines() - part_lines = part.splitlines() - replace_lines = replace.splitlines() - - # If all lines in the part start with whitespace, then honor it. - # But GPT often outdents the part and replace blocks completely, - # thereby discarding the actual leading whitespace in the file. - if all((len(pline) > 0 and pline[0].isspace()) for pline in part_lines): - return - - for i in range(len(whole_lines) - len(part_lines) + 1): - leading_whitespace = "" - for j, c in enumerate(whole_lines[i]): - if c == part_lines[0][0]: - leading_whitespace = whole_lines[i][:j] - break - - if not leading_whitespace or not all(c.isspace() for c in leading_whitespace): - continue - - matched = all( - whole_lines[i + k].startswith(leading_whitespace + part_lines[k]) - for k in range(len(part_lines)) - ) - - if matched: - replace_lines = [ - leading_whitespace + rline if rline else rline for rline in replace_lines - ] - whole_lines = whole_lines[:i] + replace_lines + whole_lines[i + len(part_lines) :] - return "\n".join(whole_lines) + "\n" - - return None - - -def replace_most_similar_chunk(whole, part, replace): - res = replace_part_with_missing_leading_whitespace(whole, part, replace) - if res: - return res - - if part in whole: - return whole.replace(part, replace) - - try: - res = try_dotdotdots(whole, part, replace) - except ValueError: - return - - if res: - return res - - similarity_thresh = 0.8 - - max_similarity = 0 - most_similar_chunk_start = -1 - most_similar_chunk_end = -1 - - whole_lines = whole.splitlines() - part_lines = part.splitlines() - - scale = 0.1 - min_len = math.floor(len(part_lines) * (1 - scale)) - max_len = math.ceil(len(part_lines) * (1 + scale)) - - for length in range(min_len, max_len): - for i in range(len(whole_lines) - length + 1): - chunk = whole_lines[i : i + length] - chunk = "\n".join(chunk) - - similarity = SequenceMatcher(None, chunk, part).ratio() - - if similarity > max_similarity and similarity: - max_similarity = similarity - most_similar_chunk_start = i - most_similar_chunk_end = i + length - - if max_similarity < similarity_thresh: - return - - replace_lines = replace.splitlines() - - modified_whole = ( - whole_lines[:most_similar_chunk_start] - + replace_lines - + whole_lines[most_similar_chunk_end:] - ) - modified_whole = "\n".join(modified_whole) - - if whole.endswith("\n"): - modified_whole += "\n" - - return modified_whole - - def quoted_file(fname, display_fname, number=False): prompt = "\n" prompt += display_fname @@ -169,60 +18,6 @@ def quoted_file(fname, display_fname, number=False): return prompt -def strip_quoted_wrapping(res, fname=None): - """ - Given an input string which may have extra "wrapping" around it, remove the wrapping. - For example: - - filename.ext - ``` - We just want this content - Not the filename and triple quotes - ``` - """ - if not res: - return res - - res = res.splitlines() - - if fname and res[0].strip().endswith(Path(fname).name): - res = res[1:] - - if res[0].startswith("```") and res[-1].startswith("```"): - res = res[1:-1] - - res = "\n".join(res) - if res and res[-1] != "\n": - res += "\n" - - return res - - -def do_replace(fname, before_text, after_text, dry_run=False): - before_text = strip_quoted_wrapping(before_text, fname) - after_text = strip_quoted_wrapping(after_text, fname) - fname = Path(fname) - - # does it want to make a new file? - if not fname.exists() and not before_text.strip(): - fname.touch() - - content = fname.read_text() - - if not before_text.strip(): - # append to existing file, or start a new file - new_content = content + after_text - else: - new_content = replace_most_similar_chunk(content, before_text, after_text) - if not new_content: - return - - if not dry_run: - fname.write_text(new_content) - - return True - - def show_messages(messages, title=None): if title: print(title.upper(), "*" * 50) @@ -232,105 +27,3 @@ def show_messages(messages, title=None): content = msg["content"].splitlines() for line in content: print(role, line) - - -ORIGINAL = "<<<<<<< ORIGINAL" -DIVIDER = "=======" -UPDATED = ">>>>>>> UPDATED" - -separators = "|".join([ORIGINAL, DIVIDER, UPDATED]) - -split_re = re.compile(r"^((?:" + separators + r")[ ]*\n)", re.MULTILINE | re.DOTALL) - - -def find_original_update_blocks(content): - # make sure we end with a newline, otherwise the regex will miss <>>>>>> UPDATED -``` - -Hope you like it! -""" - print(list(find_original_update_blocks(edit)))