import difflib import math import re import sys from difflib import SequenceMatcher from pathlib import Path from aider import utils from ..dump import dump # noqa: F401 from .base_coder import Coder from .editblock_prompts import EditBlockPrompts class EditBlockCoder(Coder): """A coder that uses search/replace blocks for code modifications.""" edit_format = "diff" gpt_prompts = EditBlockPrompts() def get_edits(self): content = self.partial_response_content # might raise ValueError for malformed ORIG/UPD blocks edits = list(find_original_update_blocks(content, self.fence)) return edits def apply_edits(self, edits): failed = [] passed = [] for edit in edits: path, original, updated = edit full_path = self.abs_root_path(path) content = self.io.read_text(full_path) new_content = do_replace(full_path, content, original, updated, self.fence) if not new_content: # try patching any of the other files in the chat for full_path in self.abs_fnames: content = self.io.read_text(full_path) new_content = do_replace(full_path, content, original, updated, self.fence) if new_content: break if new_content: self.io.write_text(full_path, new_content) passed.append(edit) else: failed.append(edit) if not failed: return blocks = "block" if len(failed) == 1 else "blocks" res = f"# {len(failed)} SEARCH/REPLACE {blocks} failed to match!\n" for edit in failed: path, original, updated = edit full_path = self.abs_root_path(path) content = self.io.read_text(full_path) res += f""" ## SearchReplaceNoExactMatch: This SEARCH block failed to exactly match lines in {path} <<<<<<< SEARCH {original}======= {updated}>>>>>>> REPLACE """ did_you_mean = find_similar_lines(original, content) if did_you_mean: res += f"""Did you mean to match some of these actual lines from {path}? {self.fence[0]} {did_you_mean} {self.fence[1]} """ if updated in content and updated: res += f"""Are you sure you need this SEARCH/REPLACE block? The REPLACE lines are already in {path}! """ res += ( "The SEARCH section must exactly match an existing block of lines including all white" " space, comments, indentation, docstrings, etc\n" ) if passed: pblocks = "block" if len(passed) == 1 else "blocks" res += f""" # The other {len(passed)} SEARCH/REPLACE {pblocks} were applied successfully. Don't re-send them. Just reply with fixed versions of the {blocks} above that failed to match. """ raise ValueError(res) def prep(content): if content and not content.endswith("\n"): content += "\n" lines = content.splitlines(keepends=True) return content, lines def perfect_or_whitespace(whole_lines, part_lines, replace_lines): # Try for a perfect match res = perfect_replace(whole_lines, part_lines, replace_lines) if res: return res # Try being flexible about leading whitespace res = replace_part_with_missing_leading_whitespace(whole_lines, part_lines, replace_lines) if res: return res def perfect_replace(whole_lines, part_lines, replace_lines): part_tup = tuple(part_lines) part_len = len(part_lines) for i in range(len(whole_lines) - part_len + 1): whole_tup = tuple(whole_lines[i : i + part_len]) if part_tup == whole_tup: res = whole_lines[:i] + replace_lines + whole_lines[i + part_len :] return "".join(res) def replace_most_similar_chunk(whole, part, replace): """Best efforts to find the `part` lines in `whole` and replace them with `replace`""" whole, whole_lines = prep(whole) part, part_lines = prep(part) replace, replace_lines = prep(replace) res = perfect_or_whitespace(whole_lines, part_lines, replace_lines) if res: return res # drop leading empty line, GPT sometimes adds them spuriously (issue #25) if len(part_lines) > 2 and not part_lines[0].strip(): skip_blank_line_part_lines = part_lines[1:] res = perfect_or_whitespace(whole_lines, skip_blank_line_part_lines, replace_lines) if res: return res # Try to handle when it elides code with ... try: res = try_dotdotdots(whole, part, replace) if res: return res except ValueError: pass return # Try fuzzy matching res = replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines) if res: return res def try_dotdotdots(whole, part, replace): """ See if the edit block has ... lines. If not, return none. If yes, try and do a perfect edit with the ... chunks. If there's a mismatch or otherwise imperfect edit, raise ValueError. If perfect edit succeeds, return the updated whole. """ dots_re = re.compile(r"(^\s*\.\.\.\n)", re.MULTILINE | re.DOTALL) part_pieces = re.split(dots_re, part) replace_pieces = re.split(dots_re, replace) if len(part_pieces) != len(replace_pieces): raise ValueError("Unpaired ... in SEARCH/REPLACE block") if len(part_pieces) == 1: # no dots in this edit block, just return None return # Compare odd strings in part_pieces and replace_pieces all_dots_match = all(part_pieces[i] == replace_pieces[i] for i in range(1, len(part_pieces), 2)) if not all_dots_match: raise ValueError("Unmatched ... in SEARCH/REPLACE block") part_pieces = [part_pieces[i] for i in range(0, len(part_pieces), 2)] replace_pieces = [replace_pieces[i] for i in range(0, len(replace_pieces), 2)] pairs = zip(part_pieces, replace_pieces) for part, replace in pairs: if not part and not replace: continue if not part and replace: if not whole.endswith("\n"): whole += "\n" whole += replace continue if whole.count(part) == 0: raise ValueError if whole.count(part) > 1: raise ValueError whole = whole.replace(part, replace, 1) return whole def replace_part_with_missing_leading_whitespace(whole_lines, part_lines, replace_lines): # GPT often messes up leading whitespace. # It usually does it uniformly across the ORIG and UPD blocks. # Either omitting all leading whitespace, or including only some of it. # Outdent everything in part_lines and replace_lines by the max fixed amount possible leading = [len(p) - len(p.lstrip()) for p in part_lines if p.strip()] + [ len(p) - len(p.lstrip()) for p in replace_lines if p.strip() ] if leading and min(leading): num_leading = min(leading) part_lines = [p[num_leading:] if p.strip() else p for p in part_lines] replace_lines = [p[num_leading:] if p.strip() else p for p in replace_lines] # can we find an exact match not including the leading whitespace num_part_lines = len(part_lines) for i in range(len(whole_lines) - num_part_lines + 1): add_leading = match_but_for_leading_whitespace( whole_lines[i : i + num_part_lines], part_lines ) if add_leading is None: continue replace_lines = [add_leading + rline if rline.strip() else rline for rline in replace_lines] whole_lines = whole_lines[:i] + replace_lines + whole_lines[i + num_part_lines :] return "".join(whole_lines) return None def match_but_for_leading_whitespace(whole_lines, part_lines): num = len(whole_lines) # does the non-whitespace all agree? if not all(whole_lines[i].lstrip() == part_lines[i].lstrip() for i in range(num)): return # are they all offset the same? add = set( whole_lines[i][: len(whole_lines[i]) - len(part_lines[i])] for i in range(num) if whole_lines[i].strip() ) if len(add) != 1: return return add.pop() def replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines): similarity_thresh = 0.8 max_similarity = 0 most_similar_chunk_start = -1 most_similar_chunk_end = -1 scale = 0.1 min_len = math.floor(len(part_lines) * (1 - scale)) max_len = math.ceil(len(part_lines) * (1 + scale)) for length in range(min_len, max_len): for i in range(len(whole_lines) - length + 1): chunk = whole_lines[i : i + length] chunk = "".join(chunk) similarity = SequenceMatcher(None, chunk, part).ratio() if similarity > max_similarity and similarity: max_similarity = similarity most_similar_chunk_start = i most_similar_chunk_end = i + length if max_similarity < similarity_thresh: return modified_whole = ( whole_lines[:most_similar_chunk_start] + replace_lines + whole_lines[most_similar_chunk_end:] ) modified_whole = "".join(modified_whole) return modified_whole DEFAULT_FENCE = ("`" * 3, "`" * 3) def strip_quoted_wrapping(res, fname=None, fence=DEFAULT_FENCE): """ Given an input string which may have extra "wrapping" around it, remove the wrapping. For example: filename.ext ``` We just want this content Not the filename and triple quotes ``` """ if not res: return res res = res.splitlines() if fname and res[0].strip().endswith(Path(fname).name): res = res[1:] if res[0].startswith(fence[0]) and res[-1].startswith(fence[1]): res = res[1:-1] res = "\n".join(res) if res and res[-1] != "\n": res += "\n" return res def do_replace(fname, content, before_text, after_text, fence=None): before_text = strip_quoted_wrapping(before_text, fname, fence) after_text = strip_quoted_wrapping(after_text, fname, fence) fname = Path(fname) # does it want to make a new file? if not fname.exists() and not before_text.strip(): fname.touch() content = "" if content is None: return if not before_text.strip(): # append to existing file, or start a new file new_content = content + after_text else: new_content = replace_most_similar_chunk(content, before_text, after_text) return new_content HEAD = "<<<<<<< SEARCH" DIVIDER = "=======" UPDATED = ">>>>>>> REPLACE" separators = "|".join([HEAD, DIVIDER, UPDATED]) split_re = re.compile(r"^((?:" + separators + r")[ ]*\n)", re.MULTILINE | re.DOTALL) missing_filename_err = ( "Bad/missing filename. The filename must be alone on the line before the opening fence" " {fence[0]}" ) def strip_filename(filename, fence): filename = filename.strip() if filename == "...": return start_fence = fence[0] if filename.startswith(start_fence): return filename = filename.rstrip(":") filename = filename.lstrip("#") filename = filename.strip() filename = filename.strip("`") filename = filename.strip("*") filename = filename.replace("\\_", "_") return filename def find_original_update_blocks(content, fence=DEFAULT_FENCE): # make sure we end with a newline, otherwise the regex will miss < best_ratio: best_ratio = ratio best_match = chunk best_match_i = i if best_ratio < threshold: return "" if best_match[0] == search_lines[0] and best_match[-1] == search_lines[-1]: return "\n".join(best_match) N = 5 best_match_end = min(len(content_lines), best_match_i + len(search_lines) + N) best_match_i = max(0, best_match_i - N) best = content_lines[best_match_i:best_match_end] return "\n".join(best) def main(): history_md = Path(sys.argv[1]).read_text() if not history_md: return messages = utils.split_chat_history_markdown(history_md) for msg in messages: msg = msg["content"] edits = list(find_original_update_blocks(msg)) for fname, before, after in edits: # Compute diff diff = difflib.unified_diff( before.splitlines(keepends=True), after.splitlines(keepends=True), fromfile="before", tofile="after", ) diff = "".join(diff) dump(before) dump(after) dump(diff) if __name__ == "__main__": main()