mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-29 08:44:59 +00:00
works
This commit is contained in:
parent
a0f03ab0ce
commit
26ebc715eb
2 changed files with 75 additions and 40 deletions
|
@ -52,13 +52,7 @@ def prep(content):
|
||||||
return content, lines
|
return content, lines
|
||||||
|
|
||||||
|
|
||||||
def replace_most_similar_chunk(whole, part, replace):
|
def perfect_or_whitespace(whole_lines, part_lines, replace_lines):
|
||||||
"""Best efforts to find the `part` lines in `whole` and replace them with `replace`"""
|
|
||||||
|
|
||||||
whole, whole_lines = prep(whole)
|
|
||||||
part, part_lines = prep(part)
|
|
||||||
replace, replace_lines = prep(replace)
|
|
||||||
|
|
||||||
# Try for a perfect match
|
# Try for a perfect match
|
||||||
if part_lines in whole_lines:
|
if part_lines in whole_lines:
|
||||||
updated_lines = whole_lines.replace(part_lines, replace_lines)
|
updated_lines = whole_lines.replace(part_lines, replace_lines)
|
||||||
|
@ -69,6 +63,26 @@ def replace_most_similar_chunk(whole, part, replace):
|
||||||
if res:
|
if res:
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def replace_most_similar_chunk(whole, part, replace):
|
||||||
|
"""Best efforts to find the `part` lines in `whole` and replace them with `replace`"""
|
||||||
|
|
||||||
|
whole, whole_lines = prep(whole)
|
||||||
|
part, part_lines = prep(part)
|
||||||
|
replace, replace_lines = prep(replace)
|
||||||
|
|
||||||
|
res = perfect_or_whitespace(whole_lines, part_lines, replace_lines)
|
||||||
|
if res:
|
||||||
|
return res
|
||||||
|
|
||||||
|
# drop leading empty line, GPT sometimes adds them spuriously (issue #25)
|
||||||
|
if len(part_lines) > 2 and not part_lines[0].strip():
|
||||||
|
skip_blank_line_part_lines = part_lines[1:]
|
||||||
|
res = perfect_or_whitespace(whole_lines, skip_blank_line_part_lines, replace_lines)
|
||||||
|
if res:
|
||||||
|
dump(repr(res))
|
||||||
|
return res
|
||||||
|
|
||||||
# Try to handle when it elides code with ...
|
# Try to handle when it elides code with ...
|
||||||
try:
|
try:
|
||||||
res = try_dotdotdots(whole, part, replace)
|
res = try_dotdotdots(whole, part, replace)
|
||||||
|
@ -78,7 +92,9 @@ def replace_most_similar_chunk(whole, part, replace):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Try fuzzy matching
|
# Try fuzzy matching
|
||||||
return replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines)
|
res = replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines)
|
||||||
|
if res:
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
def try_dotdotdots(whole, part, replace):
|
def try_dotdotdots(whole, part, replace):
|
||||||
|
@ -135,53 +151,72 @@ def try_dotdotdots(whole, part, replace):
|
||||||
|
|
||||||
|
|
||||||
def replace_part_with_missing_leading_whitespace(whole_lines, part_lines, replace_lines):
|
def replace_part_with_missing_leading_whitespace(whole_lines, part_lines, replace_lines):
|
||||||
dump(repr(part_lines), repr(replace_lines))
|
dump(whole_lines)
|
||||||
|
dump(repr(whole_lines))
|
||||||
|
dump(repr(part_lines))
|
||||||
|
dump(repr(replace_lines))
|
||||||
|
|
||||||
# GPT often messes up leading whitespace.
|
# GPT often messes up leading whitespace.
|
||||||
# It usually does it uniformly across the ORIG and UPD blocks.
|
# It usually does it uniformly across the ORIG and UPD blocks.
|
||||||
# Either omitting all leading whitespace, or including only some of it.
|
# Either omitting all leading whitespace, or including only some of it.
|
||||||
|
|
||||||
|
# Outdent everything in part_lines and replace_lines by the max fixed amount possible
|
||||||
leading = [len(p) - len(p.lstrip()) for p in part_lines if p.strip()] + [
|
leading = [len(p) - len(p.lstrip()) for p in part_lines if p.strip()] + [
|
||||||
len(p) - len(p.lstrip()) for p in replace_lines if p.strip()
|
len(p) - len(p.lstrip()) for p in replace_lines if p.strip()
|
||||||
]
|
]
|
||||||
|
|
||||||
# Outdent everything in part_lines and replace_lines by the max fixed amount possible
|
|
||||||
if leading and min(leading):
|
if leading and min(leading):
|
||||||
leading = min(leading)
|
num_leading = min(leading)
|
||||||
part_lines = [p[leading:] if p.strip() else p for p in part_lines]
|
part_lines = [p[num_leading:] if p.strip() else p for p in part_lines]
|
||||||
replace_lines = [p[leading:] if p.strip() else p for p in replace_lines]
|
replace_lines = [p[num_leading:] if p.strip() else p for p in replace_lines]
|
||||||
|
|
||||||
# TODO: this logic needs to be fixed
|
|
||||||
# if the max outdent still leaves space
|
|
||||||
if all((not pline or pline[0].isspace()) for pline in part_lines):
|
|
||||||
return
|
|
||||||
|
|
||||||
# can we find an exact match not including the leading whitespace
|
# can we find an exact match not including the leading whitespace
|
||||||
for i in range(len(whole_lines) - len(part_lines) + 1):
|
num_part_lines = len(part_lines)
|
||||||
leading_whitespace = ""
|
|
||||||
for j, c in enumerate(whole_lines[i]):
|
|
||||||
if c == part_lines[0][0]:
|
|
||||||
leading_whitespace = whole_lines[i][:j]
|
|
||||||
break
|
|
||||||
|
|
||||||
if not leading_whitespace or not all(c.isspace() for c in leading_whitespace):
|
dump(part_lines)
|
||||||
continue
|
dump(replace_lines)
|
||||||
|
|
||||||
matched = all(
|
for i in range(len(whole_lines) - num_part_lines + 1):
|
||||||
whole_lines[i + k].startswith(leading_whitespace + part_lines[k])
|
add_leading = match_but_for_leading_whitespace(
|
||||||
for k in range(len(part_lines))
|
whole_lines[i : i + num_part_lines], part_lines
|
||||||
)
|
)
|
||||||
|
|
||||||
if matched:
|
if add_leading is None:
|
||||||
replace_lines = [
|
continue
|
||||||
leading_whitespace + rline if rline else rline for rline in replace_lines
|
|
||||||
]
|
dump(len(add_leading))
|
||||||
whole_lines = whole_lines[:i] + replace_lines + whole_lines[i + len(part_lines) :]
|
|
||||||
return "".join(whole_lines)
|
replace_lines = [add_leading + rline if rline.strip() else rline for rline in replace_lines]
|
||||||
|
whole_lines = whole_lines[:i] + replace_lines + whole_lines[i + num_part_lines :]
|
||||||
|
dump(repr(whole_lines))
|
||||||
|
return "".join(whole_lines)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def match_but_for_leading_whitespace(whole_lines, part_lines):
|
||||||
|
dump(whole_lines, part_lines)
|
||||||
|
|
||||||
|
num = len(whole_lines)
|
||||||
|
|
||||||
|
# does the non-whitespace all agree?
|
||||||
|
if not all(whole_lines[i].lstrip() == part_lines[i].lstrip() for i in range(num)):
|
||||||
|
return
|
||||||
|
|
||||||
|
# are they all offset the same?
|
||||||
|
add = set(
|
||||||
|
whole_lines[i][: len(whole_lines[i]) - len(part_lines[i])]
|
||||||
|
for i in range(num)
|
||||||
|
if whole_lines[i].strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
dump(add)
|
||||||
|
if len(add) != 1:
|
||||||
|
return
|
||||||
|
|
||||||
|
return add.pop()
|
||||||
|
|
||||||
|
|
||||||
def replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines):
|
def replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines):
|
||||||
similarity_thresh = 0.8
|
similarity_thresh = 0.8
|
||||||
|
|
||||||
|
|
|
@ -246,14 +246,14 @@ These changes replace the `subprocess.run` patches with `subprocess.check_output
|
||||||
|
|
||||||
def test_replace_part_with_just_some_missing_leading_whitespace(self):
|
def test_replace_part_with_just_some_missing_leading_whitespace(self):
|
||||||
whole = " line1\n line2\n line3\n"
|
whole = " line1\n line2\n line3\n"
|
||||||
part = " line1\n line2\n"
|
part = " line1\n line2\n"
|
||||||
replace = "new_line1\nnew_line2\n"
|
replace = " new_line1\n new_line2\n"
|
||||||
expected_output = " new_line1\n new_line2\n line3\n"
|
expected_output = " new_line1\n new_line2\n line3\n"
|
||||||
|
|
||||||
result = eb.replace_most_similar_chunk(whole, part, replace)
|
result = eb.replace_most_similar_chunk(whole, part, replace)
|
||||||
self.assertEqual(result, expected_output)
|
self.assertEqual(result, expected_output)
|
||||||
|
|
||||||
def test_replace_part_with_missing_leading_whitespace_including_blank_lines(self):
|
def test_replace_part_with_missing_leading_whitespace_including_blank_line(self):
|
||||||
"""
|
"""
|
||||||
The part has leading whitespace on all lines, so should be ignored.
|
The part has leading whitespace on all lines, so should be ignored.
|
||||||
But it has a *blank* line with no whitespace at all, which was causing a
|
But it has a *blank* line with no whitespace at all, which was causing a
|
||||||
|
@ -261,8 +261,8 @@ These changes replace the `subprocess.run` patches with `subprocess.check_output
|
||||||
"""
|
"""
|
||||||
whole = " line1\n line2\n line3\n"
|
whole = " line1\n line2\n line3\n"
|
||||||
part = "\n line1\n line2\n"
|
part = "\n line1\n line2\n"
|
||||||
replace = "new_line1\nnew_line2\n"
|
replace = " new_line1\n new_line2\n"
|
||||||
expected_output = None
|
expected_output = " new_line1\n new_line2\n line3\n"
|
||||||
|
|
||||||
result = eb.replace_most_similar_chunk(whole, part, replace)
|
result = eb.replace_most_similar_chunk(whole, part, replace)
|
||||||
self.assertEqual(result, expected_output)
|
self.assertEqual(result, expected_output)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue