mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-28 16:25:00 +00:00
works
This commit is contained in:
parent
a0f03ab0ce
commit
26ebc715eb
2 changed files with 75 additions and 40 deletions
|
@ -52,13 +52,7 @@ def prep(content):
|
|||
return content, lines
|
||||
|
||||
|
||||
def replace_most_similar_chunk(whole, part, replace):
|
||||
"""Best efforts to find the `part` lines in `whole` and replace them with `replace`"""
|
||||
|
||||
whole, whole_lines = prep(whole)
|
||||
part, part_lines = prep(part)
|
||||
replace, replace_lines = prep(replace)
|
||||
|
||||
def perfect_or_whitespace(whole_lines, part_lines, replace_lines):
|
||||
# Try for a perfect match
|
||||
if part_lines in whole_lines:
|
||||
updated_lines = whole_lines.replace(part_lines, replace_lines)
|
||||
|
@ -69,6 +63,26 @@ def replace_most_similar_chunk(whole, part, replace):
|
|||
if res:
|
||||
return res
|
||||
|
||||
|
||||
def replace_most_similar_chunk(whole, part, replace):
|
||||
"""Best efforts to find the `part` lines in `whole` and replace them with `replace`"""
|
||||
|
||||
whole, whole_lines = prep(whole)
|
||||
part, part_lines = prep(part)
|
||||
replace, replace_lines = prep(replace)
|
||||
|
||||
res = perfect_or_whitespace(whole_lines, part_lines, replace_lines)
|
||||
if res:
|
||||
return res
|
||||
|
||||
# drop leading empty line, GPT sometimes adds them spuriously (issue #25)
|
||||
if len(part_lines) > 2 and not part_lines[0].strip():
|
||||
skip_blank_line_part_lines = part_lines[1:]
|
||||
res = perfect_or_whitespace(whole_lines, skip_blank_line_part_lines, replace_lines)
|
||||
if res:
|
||||
dump(repr(res))
|
||||
return res
|
||||
|
||||
# Try to handle when it elides code with ...
|
||||
try:
|
||||
res = try_dotdotdots(whole, part, replace)
|
||||
|
@ -78,7 +92,9 @@ def replace_most_similar_chunk(whole, part, replace):
|
|||
pass
|
||||
|
||||
# Try fuzzy matching
|
||||
return replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines)
|
||||
res = replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines)
|
||||
if res:
|
||||
return res
|
||||
|
||||
|
||||
def try_dotdotdots(whole, part, replace):
|
||||
|
@ -135,53 +151,72 @@ def try_dotdotdots(whole, part, replace):
|
|||
|
||||
|
||||
def replace_part_with_missing_leading_whitespace(whole_lines, part_lines, replace_lines):
|
||||
dump(repr(part_lines), repr(replace_lines))
|
||||
dump(whole_lines)
|
||||
dump(repr(whole_lines))
|
||||
dump(repr(part_lines))
|
||||
dump(repr(replace_lines))
|
||||
|
||||
# GPT often messes up leading whitespace.
|
||||
# It usually does it uniformly across the ORIG and UPD blocks.
|
||||
# Either omitting all leading whitespace, or including only some of it.
|
||||
|
||||
# Outdent everything in part_lines and replace_lines by the max fixed amount possible
|
||||
leading = [len(p) - len(p.lstrip()) for p in part_lines if p.strip()] + [
|
||||
len(p) - len(p.lstrip()) for p in replace_lines if p.strip()
|
||||
]
|
||||
|
||||
# Outdent everything in part_lines and replace_lines by the max fixed amount possible
|
||||
if leading and min(leading):
|
||||
leading = min(leading)
|
||||
part_lines = [p[leading:] if p.strip() else p for p in part_lines]
|
||||
replace_lines = [p[leading:] if p.strip() else p for p in replace_lines]
|
||||
|
||||
# TODO: this logic needs to be fixed
|
||||
# if the max outdent still leaves space
|
||||
if all((not pline or pline[0].isspace()) for pline in part_lines):
|
||||
return
|
||||
num_leading = min(leading)
|
||||
part_lines = [p[num_leading:] if p.strip() else p for p in part_lines]
|
||||
replace_lines = [p[num_leading:] if p.strip() else p for p in replace_lines]
|
||||
|
||||
# can we find an exact match not including the leading whitespace
|
||||
for i in range(len(whole_lines) - len(part_lines) + 1):
|
||||
leading_whitespace = ""
|
||||
for j, c in enumerate(whole_lines[i]):
|
||||
if c == part_lines[0][0]:
|
||||
leading_whitespace = whole_lines[i][:j]
|
||||
break
|
||||
num_part_lines = len(part_lines)
|
||||
|
||||
if not leading_whitespace or not all(c.isspace() for c in leading_whitespace):
|
||||
continue
|
||||
dump(part_lines)
|
||||
dump(replace_lines)
|
||||
|
||||
matched = all(
|
||||
whole_lines[i + k].startswith(leading_whitespace + part_lines[k])
|
||||
for k in range(len(part_lines))
|
||||
for i in range(len(whole_lines) - num_part_lines + 1):
|
||||
add_leading = match_but_for_leading_whitespace(
|
||||
whole_lines[i : i + num_part_lines], part_lines
|
||||
)
|
||||
|
||||
if matched:
|
||||
replace_lines = [
|
||||
leading_whitespace + rline if rline else rline for rline in replace_lines
|
||||
]
|
||||
whole_lines = whole_lines[:i] + replace_lines + whole_lines[i + len(part_lines) :]
|
||||
return "".join(whole_lines)
|
||||
if add_leading is None:
|
||||
continue
|
||||
|
||||
dump(len(add_leading))
|
||||
|
||||
replace_lines = [add_leading + rline if rline.strip() else rline for rline in replace_lines]
|
||||
whole_lines = whole_lines[:i] + replace_lines + whole_lines[i + num_part_lines :]
|
||||
dump(repr(whole_lines))
|
||||
return "".join(whole_lines)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def match_but_for_leading_whitespace(whole_lines, part_lines):
|
||||
dump(whole_lines, part_lines)
|
||||
|
||||
num = len(whole_lines)
|
||||
|
||||
# does the non-whitespace all agree?
|
||||
if not all(whole_lines[i].lstrip() == part_lines[i].lstrip() for i in range(num)):
|
||||
return
|
||||
|
||||
# are they all offset the same?
|
||||
add = set(
|
||||
whole_lines[i][: len(whole_lines[i]) - len(part_lines[i])]
|
||||
for i in range(num)
|
||||
if whole_lines[i].strip()
|
||||
)
|
||||
|
||||
dump(add)
|
||||
if len(add) != 1:
|
||||
return
|
||||
|
||||
return add.pop()
|
||||
|
||||
|
||||
def replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines):
|
||||
similarity_thresh = 0.8
|
||||
|
||||
|
|
|
@ -246,14 +246,14 @@ These changes replace the `subprocess.run` patches with `subprocess.check_output
|
|||
|
||||
def test_replace_part_with_just_some_missing_leading_whitespace(self):
|
||||
whole = " line1\n line2\n line3\n"
|
||||
part = " line1\n line2\n"
|
||||
replace = "new_line1\nnew_line2\n"
|
||||
part = " line1\n line2\n"
|
||||
replace = " new_line1\n new_line2\n"
|
||||
expected_output = " new_line1\n new_line2\n line3\n"
|
||||
|
||||
result = eb.replace_most_similar_chunk(whole, part, replace)
|
||||
self.assertEqual(result, expected_output)
|
||||
|
||||
def test_replace_part_with_missing_leading_whitespace_including_blank_lines(self):
|
||||
def test_replace_part_with_missing_leading_whitespace_including_blank_line(self):
|
||||
"""
|
||||
The part has leading whitespace on all lines, so should be ignored.
|
||||
But it has a *blank* line with no whitespace at all, which was causing a
|
||||
|
@ -261,8 +261,8 @@ These changes replace the `subprocess.run` patches with `subprocess.check_output
|
|||
"""
|
||||
whole = " line1\n line2\n line3\n"
|
||||
part = "\n line1\n line2\n"
|
||||
replace = "new_line1\nnew_line2\n"
|
||||
expected_output = None
|
||||
replace = " new_line1\n new_line2\n"
|
||||
expected_output = " new_line1\n new_line2\n line3\n"
|
||||
|
||||
result = eb.replace_most_similar_chunk(whole, part, replace)
|
||||
self.assertEqual(result, expected_output)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue