From 004c0529c653f4518eb87dfe2b3e5cc09e8059b1 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Thu, 3 Aug 2023 14:56:29 -0300 Subject: [PATCH] keepends --- aider/coders/editblock_coder.py | 30 +++++++++++------------------- tests/test_editblock.py | 20 ++++++++++---------- 2 files changed, 21 insertions(+), 29 deletions(-) diff --git a/aider/coders/editblock_coder.py b/aider/coders/editblock_coder.py index 1da12f712..a9c4ace1e 100644 --- a/aider/coders/editblock_coder.py +++ b/aider/coders/editblock_coder.py @@ -98,12 +98,8 @@ def try_dotdotdots(whole, part, replace): return whole -def replace_part_with_missing_leading_whitespace(whole, part, replace): - whole_lines = whole.splitlines() - part_lines = part.splitlines() - replace_lines = replace.splitlines() - - dump(repr(part), repr(replace)) +def replace_part_with_missing_leading_whitespace(whole_lines, part_lines, replace_lines): + dump(repr(part_lines), repr(replace_lines)) # GPT often messes up leading whitespace. # It usually does it uniformly across the ORIG and UPD blocks. @@ -113,7 +109,7 @@ def replace_part_with_missing_leading_whitespace(whole, part, replace): len(p) - len(p.lstrip()) for p in replace_lines if p.strip() ] - # Outdent everything in part and replace by the max fixed amount possible + # Outdent everything in part_lines and replace_lines by the max fixed amount possible if leading and min(leading): leading = min(leading) part_lines = [p[leading:] if p.strip() else p for p in part_lines] @@ -145,13 +141,17 @@ def replace_part_with_missing_leading_whitespace(whole, part, replace): leading_whitespace + rline if rline else rline for rline in replace_lines ] whole_lines = whole_lines[:i] + replace_lines + whole_lines[i + len(part_lines) :] - return "\n".join(whole_lines) + "\n" + return "".join(whole_lines) return None def replace_most_similar_chunk(whole, part, replace): - res = replace_part_with_missing_leading_whitespace(whole, part, replace) + whole_lines = whole.splitlines(keepends=True) + part_lines = part.splitlines(keepends=True) + replace_lines = replace.splitlines(keepends=True) + + res = replace_part_with_missing_leading_whitespace(whole_lines, part_lines, replace_lines) if res: return res @@ -172,9 +172,6 @@ def replace_most_similar_chunk(whole, part, replace): most_similar_chunk_start = -1 most_similar_chunk_end = -1 - whole_lines = whole.splitlines() - part_lines = part.splitlines() - scale = 0.1 min_len = math.floor(len(part_lines) * (1 - scale)) max_len = math.ceil(len(part_lines) * (1 + scale)) @@ -182,7 +179,7 @@ def replace_most_similar_chunk(whole, part, replace): for length in range(min_len, max_len): for i in range(len(whole_lines) - length + 1): chunk = whole_lines[i : i + length] - chunk = "\n".join(chunk) + chunk = "".join(chunk) similarity = SequenceMatcher(None, chunk, part).ratio() @@ -194,17 +191,12 @@ def replace_most_similar_chunk(whole, part, replace): if max_similarity < similarity_thresh: return - replace_lines = replace.splitlines() - modified_whole = ( whole_lines[:most_similar_chunk_start] + replace_lines + whole_lines[most_similar_chunk_end:] ) - modified_whole = "\n".join(modified_whole) - - if whole.endswith("\n"): - modified_whole += "\n" + modified_whole = "".join(modified_whole) return modified_whole diff --git a/tests/test_editblock.py b/tests/test_editblock.py index 9a973c88b..d24f2198d 100644 --- a/tests/test_editblock.py +++ b/tests/test_editblock.py @@ -31,10 +31,10 @@ class TestUtils(unittest.TestCase): self.assertEqual(result, expected_output) def test_replace_most_similar_chunk_not_perfect_match(self): - whole = "This is a sample text.\nAnother line of text.\nYet another line." - part = "This was a sample text.\nAnother line of txt" - replace = "This is a replaced text.\nModified line of text." - expected_output = "This is a replaced text.\nModified line of text.\nYet another line." + whole = "This is a sample text.\nAnother line of text.\nYet another line.\n" + part = "This was a sample text.\nAnother line of txt\n" + replace = "This is a replaced text.\nModified line of text.\n" + expected_output = "This is a replaced text.\nModified line of text.\nYet another line.\n" result = eb.replace_most_similar_chunk(whole, part, replace) self.assertEqual(result, expected_output) @@ -237,8 +237,8 @@ These changes replace the `subprocess.run` patches with `subprocess.check_output def test_replace_part_with_missing_leading_whitespace(self): whole = " line1\n line2\n line3\n" - part = "line1\nline2" - replace = "new_line1\nnew_line2" + part = "line1\nline2\n" + replace = "new_line1\nnew_line2\n" expected_output = " new_line1\n new_line2\n line3\n" result = eb.replace_most_similar_chunk(whole, part, replace) @@ -246,8 +246,8 @@ These changes replace the `subprocess.run` patches with `subprocess.check_output def test_replace_part_with_just_some_missing_leading_whitespace(self): whole = " line1\n line2\n line3\n" - part = " line1\n line2" - replace = "new_line1\nnew_line2" + part = " line1\n line2\n" + replace = "new_line1\nnew_line2\n" expected_output = " new_line1\n new_line2\n line3\n" result = eb.replace_most_similar_chunk(whole, part, replace) @@ -260,8 +260,8 @@ These changes replace the `subprocess.run` patches with `subprocess.check_output bug per issue #25. Test case to repro and confirm fix. """ whole = " line1\n line2\n line3\n" - part = "\n line1\n line2" - replace = "new_line1\nnew_line2" + part = "\n line1\n line2\n" + replace = "new_line1\nnew_line2\n" expected_output = None result = eb.replace_most_similar_chunk(whole, part, replace)