aider/aider/coders/editblock_coder.py

597 lines
18 KiB
Python

import difflib
import math
import re
import subprocess
import sys
from difflib import SequenceMatcher
from pathlib import Path
from aider import utils
from ..dump import dump # noqa: F401
from .base_coder import Coder
from .editblock_prompts import EditBlockPrompts
class EditBlockCoder(Coder):
"""A coder that uses search/replace blocks for code modifications."""
edit_format = "diff"
gpt_prompts = EditBlockPrompts()
def get_edits(self):
content = self.partial_response_content
# might raise ValueError for malformed ORIG/UPD blocks
edits = list(find_original_update_blocks(content, self.fence))
return edits
def run_interactive_subprocess(self, command):
try:
result = subprocess.run(
command,
text=True,
shell=True,
encoding=self.io.encoding,
errors="replace",
)
if result.returncode == 0:
return
self.io.tool_error(f"Command '{command}' exited with status {result.returncode}")
except Exception as e:
self.io.tool_error(f"Error running command '{command}': {str(e)}")
self.io.tool_output(f"To retry and share output with the LLM: /run {command}")
self.io.tool_output("You can find this command in your input history with up-arrow.")
def handle_shell_commands(self, commands_str):
commands = commands_str.strip().splitlines()
command_count = sum(
1 for cmd in commands if cmd.strip() and not cmd.strip().startswith("#")
)
prompt = "Run shell command?" if command_count == 1 else "Run shell commands?"
if not self.io.confirm_ask(prompt, subject="\n".join(commands), explicit_yes_required=True):
return
for command in commands:
command = command.strip()
if not command or command.startswith("#"):
continue
self.io.tool_output()
self.io.tool_output(f"Running {command}")
# Add the command to input history
self.io.add_to_input_history(f"/run {command.strip()}")
result = self.run_interactive_subprocess(command)
if result and result.stdout:
self.io.tool_output(result.stdout)
def apply_edits(self, edits):
failed = []
passed = []
for edit in edits:
if edit[0] is None:
self.handle_shell_commands(edit[1])
continue
else:
path, original, updated = edit
full_path = self.abs_root_path(path)
content = self.io.read_text(full_path)
new_content = do_replace(full_path, content, original, updated, self.fence)
if not new_content:
# try patching any of the other files in the chat
for full_path in self.abs_fnames:
content = self.io.read_text(full_path)
new_content = do_replace(full_path, content, original, updated, self.fence)
if new_content:
break
if new_content:
self.io.write_text(full_path, new_content)
passed.append(edit)
else:
failed.append(edit)
if not failed:
return
blocks = "block" if len(failed) == 1 else "blocks"
res = f"# {len(failed)} SEARCH/REPLACE {blocks} failed to match!\n"
for edit in failed:
path, original, updated = edit
full_path = self.abs_root_path(path)
content = self.io.read_text(full_path)
res += f"""
## SearchReplaceNoExactMatch: This SEARCH block failed to exactly match lines in {path}
<<<<<<< SEARCH
{original}=======
{updated}>>>>>>> REPLACE
"""
did_you_mean = find_similar_lines(original, content)
if did_you_mean:
res += f"""Did you mean to match some of these actual lines from {path}?
{self.fence[0]}
{did_you_mean}
{self.fence[1]}
"""
if updated in content and updated:
res += f"""Are you sure you need this SEARCH/REPLACE block?
The REPLACE lines are already in {path}!
"""
res += (
"The SEARCH section must exactly match an existing block of lines including all white"
" space, comments, indentation, docstrings, etc\n"
)
if passed:
pblocks = "block" if len(passed) == 1 else "blocks"
res += f"""
# The other {len(passed)} SEARCH/REPLACE {pblocks} were applied successfully.
Don't re-send them.
Just reply with fixed versions of the {blocks} above that failed to match.
"""
raise ValueError(res)
def prep(content):
if content and not content.endswith("\n"):
content += "\n"
lines = content.splitlines(keepends=True)
return content, lines
def perfect_or_whitespace(whole_lines, part_lines, replace_lines):
# Try for a perfect match
res = perfect_replace(whole_lines, part_lines, replace_lines)
if res:
return res
# Try being flexible about leading whitespace
res = replace_part_with_missing_leading_whitespace(whole_lines, part_lines, replace_lines)
if res:
return res
def perfect_replace(whole_lines, part_lines, replace_lines):
part_tup = tuple(part_lines)
part_len = len(part_lines)
for i in range(len(whole_lines) - part_len + 1):
whole_tup = tuple(whole_lines[i : i + part_len])
if part_tup == whole_tup:
res = whole_lines[:i] + replace_lines + whole_lines[i + part_len :]
return "".join(res)
def replace_most_similar_chunk(whole, part, replace):
"""Best efforts to find the `part` lines in `whole` and replace them with `replace`"""
whole, whole_lines = prep(whole)
part, part_lines = prep(part)
replace, replace_lines = prep(replace)
res = perfect_or_whitespace(whole_lines, part_lines, replace_lines)
if res:
return res
# drop leading empty line, GPT sometimes adds them spuriously (issue #25)
if len(part_lines) > 2 and not part_lines[0].strip():
skip_blank_line_part_lines = part_lines[1:]
res = perfect_or_whitespace(whole_lines, skip_blank_line_part_lines, replace_lines)
if res:
return res
# Try to handle when it elides code with ...
try:
res = try_dotdotdots(whole, part, replace)
if res:
return res
except ValueError:
pass
return
# Try fuzzy matching
res = replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines)
if res:
return res
def try_dotdotdots(whole, part, replace):
"""
See if the edit block has ... lines.
If not, return none.
If yes, try and do a perfect edit with the ... chunks.
If there's a mismatch or otherwise imperfect edit, raise ValueError.
If perfect edit succeeds, return the updated whole.
"""
dots_re = re.compile(r"(^\s*\.\.\.\n)", re.MULTILINE | re.DOTALL)
part_pieces = re.split(dots_re, part)
replace_pieces = re.split(dots_re, replace)
if len(part_pieces) != len(replace_pieces):
raise ValueError("Unpaired ... in SEARCH/REPLACE block")
if len(part_pieces) == 1:
# no dots in this edit block, just return None
return
# Compare odd strings in part_pieces and replace_pieces
all_dots_match = all(part_pieces[i] == replace_pieces[i] for i in range(1, len(part_pieces), 2))
if not all_dots_match:
raise ValueError("Unmatched ... in SEARCH/REPLACE block")
part_pieces = [part_pieces[i] for i in range(0, len(part_pieces), 2)]
replace_pieces = [replace_pieces[i] for i in range(0, len(replace_pieces), 2)]
pairs = zip(part_pieces, replace_pieces)
for part, replace in pairs:
if not part and not replace:
continue
if not part and replace:
if not whole.endswith("\n"):
whole += "\n"
whole += replace
continue
if whole.count(part) == 0:
raise ValueError
if whole.count(part) > 1:
raise ValueError
whole = whole.replace(part, replace, 1)
return whole
def replace_part_with_missing_leading_whitespace(whole_lines, part_lines, replace_lines):
# GPT often messes up leading whitespace.
# It usually does it uniformly across the ORIG and UPD blocks.
# Either omitting all leading whitespace, or including only some of it.
# Outdent everything in part_lines and replace_lines by the max fixed amount possible
leading = [len(p) - len(p.lstrip()) for p in part_lines if p.strip()] + [
len(p) - len(p.lstrip()) for p in replace_lines if p.strip()
]
if leading and min(leading):
num_leading = min(leading)
part_lines = [p[num_leading:] if p.strip() else p for p in part_lines]
replace_lines = [p[num_leading:] if p.strip() else p for p in replace_lines]
# can we find an exact match not including the leading whitespace
num_part_lines = len(part_lines)
for i in range(len(whole_lines) - num_part_lines + 1):
add_leading = match_but_for_leading_whitespace(
whole_lines[i : i + num_part_lines], part_lines
)
if add_leading is None:
continue
replace_lines = [add_leading + rline if rline.strip() else rline for rline in replace_lines]
whole_lines = whole_lines[:i] + replace_lines + whole_lines[i + num_part_lines :]
return "".join(whole_lines)
return None
def match_but_for_leading_whitespace(whole_lines, part_lines):
num = len(whole_lines)
# does the non-whitespace all agree?
if not all(whole_lines[i].lstrip() == part_lines[i].lstrip() for i in range(num)):
return
# are they all offset the same?
add = set(
whole_lines[i][: len(whole_lines[i]) - len(part_lines[i])]
for i in range(num)
if whole_lines[i].strip()
)
if len(add) != 1:
return
return add.pop()
def replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines):
similarity_thresh = 0.8
max_similarity = 0
most_similar_chunk_start = -1
most_similar_chunk_end = -1
scale = 0.1
min_len = math.floor(len(part_lines) * (1 - scale))
max_len = math.ceil(len(part_lines) * (1 + scale))
for length in range(min_len, max_len):
for i in range(len(whole_lines) - length + 1):
chunk = whole_lines[i : i + length]
chunk = "".join(chunk)
similarity = SequenceMatcher(None, chunk, part).ratio()
if similarity > max_similarity and similarity:
max_similarity = similarity
most_similar_chunk_start = i
most_similar_chunk_end = i + length
if max_similarity < similarity_thresh:
return
modified_whole = (
whole_lines[:most_similar_chunk_start]
+ replace_lines
+ whole_lines[most_similar_chunk_end:]
)
modified_whole = "".join(modified_whole)
return modified_whole
DEFAULT_FENCE = ("`" * 3, "`" * 3)
def strip_quoted_wrapping(res, fname=None, fence=DEFAULT_FENCE):
"""
Given an input string which may have extra "wrapping" around it, remove the wrapping.
For example:
filename.ext
```
We just want this content
Not the filename and triple quotes
```
"""
if not res:
return res
res = res.splitlines()
if fname and res[0].strip().endswith(Path(fname).name):
res = res[1:]
if res[0].startswith(fence[0]) and res[-1].startswith(fence[1]):
res = res[1:-1]
res = "\n".join(res)
if res and res[-1] != "\n":
res += "\n"
return res
def do_replace(fname, content, before_text, after_text, fence=None):
before_text = strip_quoted_wrapping(before_text, fname, fence)
after_text = strip_quoted_wrapping(after_text, fname, fence)
fname = Path(fname)
# does it want to make a new file?
if not fname.exists() and not before_text.strip():
fname.touch()
content = ""
if content is None:
return
if not before_text.strip():
# append to existing file, or start a new file
new_content = content + after_text
else:
new_content = replace_most_similar_chunk(content, before_text, after_text)
return new_content
HEAD = "<<<<<<< SEARCH"
DIVIDER = "======="
UPDATED = ">>>>>>> REPLACE"
separators = "|".join([HEAD, DIVIDER, UPDATED])
split_re = re.compile(r"^((?:" + separators + r")[ ]*\n)", re.MULTILINE | re.DOTALL)
missing_filename_err = (
"Bad/missing filename. The filename must be alone on the line before the opening fence"
" {fence[0]}"
)
def strip_filename(filename, fence):
filename = filename.strip()
if filename == "...":
return
start_fence = fence[0]
if filename.startswith(start_fence):
return
filename = filename.rstrip(":")
filename = filename.lstrip("#")
filename = filename.strip()
filename = filename.strip("`")
filename = filename.strip("*")
filename = filename.replace("\\_", "_")
return filename
def find_original_update_blocks(content, fence=DEFAULT_FENCE):
lines = content.splitlines(keepends=True)
i = 0
current_filename = None
while i < len(lines):
line = lines[i]
# Check for shell code blocks
shell_starts = [
"```bash",
"```sh",
"```shell",
"```cmd",
"```batch",
"```powershell",
"```ps1",
"```zsh",
"```fish",
"```ksh",
"```csh",
"```tcsh",
]
if any(line.strip().startswith(start) for start in shell_starts):
shell_content = []
i += 1
while i < len(lines) and not lines[i].strip().startswith("```"):
shell_content.append(lines[i])
i += 1
if i < len(lines) and lines[i].strip().startswith("```"):
i += 1 # Skip the closing ```
yield None, "".join(shell_content)
continue
# Check for SEARCH/REPLACE blocks
if line.strip() == HEAD:
try:
filename = find_filename(lines[max(0, i - 3) : i], fence)
if not filename:
if current_filename:
filename = current_filename
else:
raise ValueError(missing_filename_err.format(fence=fence))
current_filename = filename
original_text = []
i += 1
while i < len(lines) and not lines[i].strip() == DIVIDER:
original_text.append(lines[i])
i += 1
if i >= len(lines) or lines[i].strip() != DIVIDER:
raise ValueError(f"Expected `{DIVIDER}`")
updated_text = []
i += 1
while i < len(lines) and not lines[i].strip() in (UPDATED, DIVIDER):
updated_text.append(lines[i])
i += 1
if i >= len(lines) or lines[i].strip() not in (UPDATED, DIVIDER):
raise ValueError(f"Expected `{UPDATED}` or `{DIVIDER}`")
yield filename, "".join(original_text), "".join(updated_text)
except ValueError as e:
processed = "".join(lines[: i + 1])
err = e.args[0]
raise ValueError(f"{processed}\n^^^ {err}")
i += 1
def find_filename(lines, fence):
"""
Deepseek Coder v2 has been doing this:
```python
word_count.py
```
```python
<<<<<<< SEARCH
...
This is a more flexible search back for filenames.
"""
# Go back through the 3 preceding lines
lines.reverse()
lines = lines[:3]
for line in lines:
# If we find a filename, done
filename = strip_filename(line, fence)
if filename:
return filename
# Only continue as long as we keep seeing fences
if not line.startswith(fence[0]):
return
def find_similar_lines(search_lines, content_lines, threshold=0.6):
search_lines = search_lines.splitlines()
content_lines = content_lines.splitlines()
best_ratio = 0
best_match = None
for i in range(len(content_lines) - len(search_lines) + 1):
chunk = content_lines[i : i + len(search_lines)]
ratio = SequenceMatcher(None, search_lines, chunk).ratio()
if ratio > best_ratio:
best_ratio = ratio
best_match = chunk
best_match_i = i
if best_ratio < threshold:
return ""
if best_match[0] == search_lines[0] and best_match[-1] == search_lines[-1]:
return "\n".join(best_match)
N = 5
best_match_end = min(len(content_lines), best_match_i + len(search_lines) + N)
best_match_i = max(0, best_match_i - N)
best = content_lines[best_match_i:best_match_end]
return "\n".join(best)
def main():
history_md = Path(sys.argv[1]).read_text()
if not history_md:
return
messages = utils.split_chat_history_markdown(history_md)
for msg in messages:
msg = msg["content"]
edits = list(find_original_update_blocks(msg))
for fname, before, after in edits:
# Compute diff
diff = difflib.unified_diff(
before.splitlines(keepends=True),
after.splitlines(keepends=True),
fromfile="before",
tofile="after",
)
diff = "".join(diff)
dump(before)
dump(after)
dump(diff)
if __name__ == "__main__":
main()