added missing files

This commit is contained in:
Paul Gauthier 2023-12-17 13:00:32 -08:00
parent 360846320f
commit 0d4b6b6885
5 changed files with 1918 additions and 0 deletions

770
aider/coders/search_replace.py Executable file
View file

@ -0,0 +1,770 @@
#!/usr/bin/env python
import sys
from pathlib import Path
import git
from diff_match_patch import diff_match_patch
from tqdm import tqdm
from aider.dump import dump
from aider.utils import GitTemporaryDirectory
class RelativeIndenter:
"""Rewrites text files to have relative indentation, which involves
reformatting the leading white space on lines. This format makes
it easier to search and apply edits to pairs of code blocks which
may differ significantly in their overall level of indentation.
It removes leading white space which is shared with the preceding
line.
Original:
```
Foo # indented 8
Bar # indented 4 more than the previous line
Baz # same indent as the previous line
Fob # same indent as the previous line
```
Becomes:
```
Foo # indented 8
Bar # indented 4 more than the previous line
Baz # same indent as the previous line
Fob # same indent as the previous line
```
If the current line is *less* indented then the previous line,
uses a unicode character to indicate outdenting.
Original
```
Foo
Bar
Baz
Fob # indented 4 less than the previous line
```
Becomes:
```
Foo
Bar
Baz
Fob # indented 4 less than the previous line
```
This is a similar original to the last one, but every line has
been uniformly outdented:
```
Foo
Bar
Baz
Fob # indented 4 less than the previous line
```
It becomes this result, which is very similar to the previous
result. Only the white space on the first line differs. From the
word Foo onwards, it is identical to the previous result.
```
Foo
Bar
Baz
Fob # indented 4 less than the previous line
```
"""
def __init__(self, texts):
"""
Based on the texts, choose a unicode character that isn't in any of them.
"""
chars = set()
for text in texts:
chars.update(text)
ARROW = ""
if ARROW not in chars:
self.marker = ARROW
else:
self.marker = self.select_unique_marker(chars)
def select_unique_marker(self, chars):
for codepoint in range(0x10FFFF, 0x10000, -1):
marker = chr(codepoint)
if marker not in chars:
return marker
raise ValueError("Could not find a unique marker")
def make_relative(self, text):
"""
Transform text to use relative indents.
"""
if self.marker in text:
raise ValueError("Text already contains the outdent marker: {self.marker}")
lines = text.splitlines(keepends=True)
output = []
prev_indent = ""
for line in lines:
line_without_end = line.rstrip("\n\r")
len_indent = len(line_without_end) - len(line_without_end.lstrip())
indent = line[:len_indent]
change = len_indent - len(prev_indent)
if change > 0:
cur_indent = indent[-change:]
elif change < 0:
cur_indent = self.marker * -change
else:
cur_indent = ""
out_line = cur_indent + "\n" + line[len_indent:]
# dump(len_indent, change, out_line)
# print(out_line)
output.append(out_line)
prev_indent = indent
res = "".join(output)
return res
def make_absolute(self, text):
"""
Transform text from relative back to absolute indents.
"""
lines = text.splitlines(keepends=True)
output = []
prev_indent = ""
for i in range(0, len(lines), 2):
dent = lines[i].rstrip("\r\n")
non_indent = lines[i + 1]
if dent.startswith(self.marker):
len_outdent = len(dent)
cur_indent = prev_indent[:-len_outdent]
else:
cur_indent = prev_indent + dent
if not non_indent.rstrip("\r\n"):
out_line = non_indent # don't indent a blank line
else:
out_line = cur_indent + non_indent
output.append(out_line)
prev_indent = cur_indent
res = "".join(output)
if self.marker in res:
# dump(res)
raise ValueError("Error transforming text back to absolute indents")
return res
# The patches are created to change S->R.
# So all the patch offsets are relative to S.
# But O has a lot more content. So all the offsets are very wrong.
#
# But patch_apply() seems to imply that once patch N is located,
# then it adjusts the offset of the next patch.
#
# This is great, because once we sync up after a big gap the nearby
# patches are close to being located right.
# Except when indentation has been changed by GPT.
#
# It would help to use the diff trick to build map_S_offset_to_O_offset().
# Then update all the S offsets in the S->R patches to be O offsets.
# Do we also need to update the R offsets?
#
# What if this gets funky/wrong?
#
def map_patches(texts, patches, debug):
search_text, replace_text, original_text = texts
dmp = diff_match_patch()
dmp.Diff_Timeout = 5
diff_s_o = dmp.diff_main(search_text, original_text)
# diff_r_s = dmp.diff_main(replace_text, search_text)
# dmp.diff_cleanupSemantic(diff_s_o)
# dmp.diff_cleanupEfficiency(diff_s_o)
if debug:
html = dmp.diff_prettyHtml(diff_s_o)
Path("tmp.html").write_text(html)
dump(len(search_text))
dump(len(original_text))
for patch in patches:
start1 = patch.start1
start2 = patch.start2
patch.start1 = dmp.diff_xIndex(diff_s_o, start1)
patch.start2 = dmp.diff_xIndex(diff_s_o, start2)
if debug:
print()
print(start1, repr(search_text[start1 : start1 + 50]))
print(patch.start1, repr(original_text[patch.start1 : patch.start1 + 50]))
print(patch.diffs)
print()
return patches
example = """Left
Left
4 in
4 in
8 in
4 in
Left
"""
"""
ri = RelativeIndenter([example])
dump(example)
rel_example = ri.make_relative(example)
dump(repr(rel_example))
abs_example = ri.make_absolute(rel_example)
dump(abs_example)
sys.exit()
"""
def relative_indent(texts):
ri = RelativeIndenter(texts)
texts = list(map(ri.make_relative, texts))
return ri, texts
line_padding = 100
def line_pad(text):
padding = "\n" * line_padding
return padding + text + padding
def line_unpad(text):
if set(text[:line_padding] + text[-line_padding:]) != set("\n"):
return
return text[line_padding:-line_padding]
def dmp_apply(texts, remap=True):
debug = False
# debug = True
search_text, replace_text, original_text = texts
dmp = diff_match_patch()
dmp.Diff_Timeout = 5
# dmp.Diff_EditCost = 16
if remap:
dmp.Match_Threshold = 0.95
dmp.Match_Distance = 500
dmp.Match_MaxBits = 128
dmp.Patch_Margin = 32
else:
dmp.Match_Threshold = 0.5
dmp.Match_Distance = 100_000
dmp.Match_MaxBits = 32
dmp.Patch_Margin = 8
diff = dmp.diff_main(search_text, replace_text, None)
dmp.diff_cleanupSemantic(diff)
dmp.diff_cleanupEfficiency(diff)
patches = dmp.patch_make(search_text, diff)
if debug:
html = dmp.diff_prettyHtml(diff)
Path("tmp.search_replace_diff.html").write_text(html)
for d in diff:
print(d[0], repr(d[1]))
for patch in patches:
start1 = patch.start1
print()
print(start1, repr(search_text[start1 : start1 + 10]))
print(start1, repr(replace_text[start1 : start1 + 10]))
print(patch.diffs)
# dump(original_text)
# dump(search_text)
if remap:
patches = map_patches(texts, patches, debug)
patches_text = dmp.patch_toText(patches)
new_text, success = dmp.patch_apply(patches, original_text)
all_success = False not in success
if debug:
# dump(new_text)
print(patches_text)
# print(new_text)
dump(success)
dump(all_success)
# print(new_text)
if not all_success:
return
return new_text
def lines_to_chars(lines, mapping):
new_text = []
for char in lines:
new_text.append(mapping[ord(char)])
new_text = "".join(new_text)
return new_text
def dmp_lines_apply(texts, remap=True):
debug = False
# debug = True
for t in texts:
assert t.endswith("\n"), t
search_text, replace_text, original_text = texts
dmp = diff_match_patch()
dmp.Diff_Timeout = 5
# dmp.Diff_EditCost = 16
dmp.Match_Threshold = 0.1
dmp.Match_Distance = 100_000
dmp.Match_MaxBits = 32
dmp.Patch_Margin = 1
all_text = search_text + replace_text + original_text
all_lines, _, mapping = dmp.diff_linesToChars(all_text, "")
assert len(all_lines) == len(all_text.splitlines())
search_num = len(search_text.splitlines())
replace_num = len(replace_text.splitlines())
original_num = len(original_text.splitlines())
search_lines = all_lines[:search_num]
replace_lines = all_lines[search_num : search_num + replace_num]
original_lines = all_lines[search_num + replace_num :]
assert len(search_lines) == search_num
assert len(replace_lines) == replace_num
assert len(original_lines) == original_num
diff_lines = dmp.diff_main(search_lines, replace_lines, None)
dmp.diff_cleanupSemantic(diff_lines)
dmp.diff_cleanupEfficiency(diff_lines)
patches = dmp.patch_make(search_lines, diff_lines)
if debug:
diff = list(diff_lines)
dmp.diff_charsToLines(diff, mapping)
dump(diff)
html = dmp.diff_prettyHtml(diff)
Path("tmp.search_replace_diff.html").write_text(html)
for d in diff:
print(d[0], repr(d[1]))
new_lines, success = dmp.patch_apply(patches, original_lines)
new_text = lines_to_chars(new_lines, mapping)
all_success = False not in success
if debug:
# print(new_text)
dump(success)
dump(all_success)
# print(new_text)
if not all_success:
return
return new_text
def diff_lines(search_text, replace_text):
dmp = diff_match_patch()
dmp.Diff_Timeout = 5
# dmp.Diff_EditCost = 16
search_lines, replace_lines, mapping = dmp.diff_linesToChars(search_text, replace_text)
diff_lines = dmp.diff_main(search_lines, replace_lines, None)
dmp.diff_cleanupSemantic(diff_lines)
dmp.diff_cleanupEfficiency(diff_lines)
diff = list(diff_lines)
dmp.diff_charsToLines(diff, mapping)
dump(diff)
udiff = []
for d, lines in diff:
if d < 0:
d = "-"
elif d > 0:
d = "+"
else:
d = " "
for line in lines.splitlines(keepends=True):
udiff.append(d + line)
return udiff
def search_and_replace(texts):
search_text, replace_text, original_text = texts
num = original_text.count(search_text)
# if num > 1:
# raise SearchTextNotUnique()
if num == 0:
return
new_text = original_text.replace(search_text, replace_text)
return new_text
def git_cherry_pick_osr_onto_o(texts):
search_text, replace_text, original_text = texts
with GitTemporaryDirectory() as dname:
repo = git.Repo(dname)
fname = Path(dname) / "file.txt"
# Make O->S->R
fname.write_text(original_text)
repo.git.add(str(fname))
repo.git.commit("-m", "original")
original_hash = repo.head.commit.hexsha
fname.write_text(search_text)
repo.git.add(str(fname))
repo.git.commit("-m", "search")
fname.write_text(replace_text)
repo.git.add(str(fname))
repo.git.commit("-m", "replace")
replace_hash = repo.head.commit.hexsha
# go back to O
repo.git.checkout(original_hash)
# cherry pick R onto original
try:
repo.git.cherry_pick(replace_hash, "--minimal")
except git.exc.GitCommandError:
# merge conflicts!
return
new_text = fname.read_text()
return new_text
def git_cherry_pick_sr_onto_so(texts):
search_text, replace_text, original_text = texts
with GitTemporaryDirectory() as dname:
repo = git.Repo(dname)
fname = Path(dname) / "file.txt"
fname.write_text(search_text)
repo.git.add(str(fname))
repo.git.commit("-m", "search")
search_hash = repo.head.commit.hexsha
# make search->replace
fname.write_text(replace_text)
repo.git.add(str(fname))
repo.git.commit("-m", "replace")
replace_hash = repo.head.commit.hexsha
# go back to search,
repo.git.checkout(search_hash)
# make search->original
fname.write_text(original_text)
repo.git.add(str(fname))
repo.git.commit("-m", "original")
# cherry pick replace onto original
try:
repo.git.cherry_pick(replace_hash, "--minimal")
except git.exc.GitCommandError:
# merge conflicts!
return
new_text = fname.read_text()
return new_text
class SearchTextNotUnique(ValueError):
pass
all_preprocs = [
# (strip_blank_lines, relative_indent, reverse_lines)
(False, False, False),
(True, False, False),
(False, True, False),
(True, True, False),
# (False, False, True),
# (True, False, True),
# (False, True, True),
# (True, True, True),
]
always_relative_indent = [
(False, True, False),
(True, True, False),
# (False, True, True),
# (True, True, True),
]
editblock_strategies = [
(search_and_replace, all_preprocs),
(git_cherry_pick_osr_onto_o, all_preprocs),
(dmp_lines_apply, all_preprocs),
]
never_relative = [
(False, False),
(True, False),
]
udiff_strategies = [
(search_and_replace, all_preprocs),
(git_cherry_pick_osr_onto_o, all_preprocs),
(dmp_lines_apply, all_preprocs),
]
def flexible_search_and_replace(texts, strategies):
"""Try a series of search/replace methods, starting from the most
literal interpretation of search_text. If needed, progress to more
flexible methods, which can accommodate divergence between
search_text and original_text and yet still achieve the desired
edits.
"""
for strategy, preprocs in strategies:
for preproc in preprocs:
res = try_strategy(texts, strategy, preproc)
if res:
dump(strategy, preproc)
return res
def reverse_lines(text):
lines = text.splitlines(keepends=True)
lines.reverse()
return "".join(lines)
def try_strategy(texts, strategy, preproc):
preproc_strip_blank_lines, preproc_relative_indent, preproc_reverse = preproc
ri = None
if preproc_strip_blank_lines:
texts = strip_blank_lines(texts)
if preproc_relative_indent:
ri, texts = relative_indent(texts)
if preproc_reverse:
texts = list(map(reverse_lines, texts))
res = strategy(texts)
if res and preproc_reverse:
res = reverse_lines(res)
if res and preproc_relative_indent:
try:
res = ri.make_absolute(res)
except ValueError:
return
return res
def strip_blank_lines(texts):
# strip leading and trailing blank lines
texts = [text.strip("\n") + "\n" for text in texts]
return texts
def read_text(fname):
text = Path(fname).read_text()
return text
def proc(dname):
dname = Path(dname)
try:
search_text = read_text(dname / "search")
replace_text = read_text(dname / "replace")
original_text = read_text(dname / "original")
except FileNotFoundError:
return
####
texts = search_text, replace_text, original_text
strategies = [
# (search_and_replace, all_preprocs),
# (git_cherry_pick_osr_onto_o, all_preprocs),
# (git_cherry_pick_sr_onto_so, all_preprocs),
# (dmp_apply, all_preprocs),
(dmp_lines_apply, all_preprocs),
]
_strategies = editblock_strategies # noqa: F841
short_names = dict(
search_and_replace="sr",
git_cherry_pick_osr_onto_o="cp_o",
git_cherry_pick_sr_onto_so="cp_so",
dmp_apply="dmp",
dmp_lines_apply="dmpl",
)
patched = dict()
for strategy, preprocs in strategies:
for preproc in preprocs:
method = strategy.__name__
method = short_names[method]
strip_blank, rel_indent, rev_lines = preproc
if strip_blank or rel_indent:
method += "_"
if strip_blank:
method += "s"
if rel_indent:
method += "i"
if rev_lines:
method += "r"
res = try_strategy(texts, strategy, preproc)
patched[method] = res
results = []
for method, res in patched.items():
out_fname = dname / f"original.{method}"
if out_fname.exists():
out_fname.unlink()
if res:
out_fname.write_text(res)
correct = (dname / "correct").read_text()
if res == correct:
res = "pass"
else:
res = "WRONG"
else:
res = "fail"
results.append((method, res))
return results
def colorize_result(result):
colors = {
"pass": "\033[102;30mpass\033[0m", # Green background, black text
"WRONG": "\033[101;30mWRONG\033[0m", # Red background, black text
"fail": "\033[103;30mfail\033[0m", # Yellow background, black text
}
return colors.get(result, result) # Default to original result if not found
def main(dnames):
all_results = []
for dname in tqdm(dnames):
dname = Path(dname)
results = proc(dname)
for method, res in results:
all_results.append((dname, method, res))
# print(dname, method, colorize_result(res))
# Create a 2D table with directories along the right and methods along the top
# Collect all unique methods and directories
methods = []
for _, method, _ in all_results:
if method not in methods:
methods.append(method)
directories = dnames
# Sort directories by decreasing number of 'pass' results
pass_counts = {
dname: sum(
res == "pass" for dname_result, _, res in all_results if str(dname) == str(dname_result)
)
for dname in directories
}
directories.sort(key=lambda dname: pass_counts[dname], reverse=True)
# Create a results matrix
results_matrix = {dname: {method: "" for method in methods} for dname in directories}
# Populate the results matrix
for dname, method, res in all_results:
results_matrix[str(dname)][method] = res
# Print the 2D table
# Print the header
print("{:<20}".format("Directory"), end="")
for method in methods:
print("{:<9}".format(method), end="")
print()
# Print the rows with colorized results
for dname in directories:
print("{:<20}".format(Path(dname).name), end="")
for method in methods:
res = results_matrix[dname][method]
colorized_res = colorize_result(res)
res_l = 9 + len(colorized_res) - len(res)
fmt = "{:<" + str(res_l) + "}"
print(fmt.format(colorized_res), end="")
print()
if __name__ == "__main__":
status = main(sys.argv[1:])
sys.exit(status)

395
aider/coders/udiff_coder.py Normal file
View file

@ -0,0 +1,395 @@
import difflib
from itertools import groupby
from pathlib import Path
from ..dump import dump # noqa: F401
from .base_coder import Coder
from .search_replace import (
SearchTextNotUnique,
all_preprocs,
diff_lines,
flexible_search_and_replace,
search_and_replace,
)
from .udiff_prompts import UnifiedDiffPrompts
no_match_error = """UnifiedDiffNoMatch: hunk failed to apply!
{path} does not contain lines that match the diff you provided!
Try again.
DO NOT skip blank lines, comments, docstrings, etc!
The diff needs to apply cleanly to the lines in {path}!
{path} does not contain these {num_lines} exact lines in a row:
```
{original}```
"""
not_unique_error = """UnifiedDiffNotUnique: hunk failed to apply!
{path} contains multiple sets of lines that match the diff you provided!
Try again.
Use additional ` ` lines to provide context that uniquely indicates which code needs to be changed.
The diff needs to apply to a unique set of lines in {path}!
{path} contains multiple copies of these {num_lines} lines:
```
{original}```
"""
class UnifiedDiffCoder(Coder):
edit_format = "udiff"
def __init__(self, *args, **kwargs):
self.gpt_prompts = UnifiedDiffPrompts()
super().__init__(*args, **kwargs)
def get_edits(self):
content = self.partial_response_content
# might raise ValueError for malformed ORIG/UPD blocks
raw_edits = list(find_diffs(content))
last_path = None
edits = []
for path, hunk in raw_edits:
if path:
last_path = path
else:
path = last_path
edits.append((path, hunk))
return edits
def apply_edits(self, edits):
seen = set()
uniq = []
for path, hunk in edits:
hunk = normalize_hunk(hunk)
if not hunk:
continue
this = [path + "\n"] + hunk
this = "".join(this)
if this in seen:
continue
seen.add(this)
uniq.append((path, hunk))
errors = []
for path, hunk in uniq:
full_path = self.abs_root_path(path)
content = self.io.read_text(full_path)
original, _ = hunk_to_before_after(hunk)
try:
content = do_replace(full_path, content, hunk)
except SearchTextNotUnique:
errors.append(
not_unique_error.format(
path=path, original=original, num_lines=len(original.splitlines())
)
)
continue
if not content:
errors.append(
no_match_error.format(
path=path, original=original, num_lines=len(original.splitlines())
)
)
continue
# SUCCESS!
self.io.write_text(full_path, content)
if errors:
errors = "\n\n".join(errors)
raise ValueError(errors)
def do_replace(fname, content, hunk):
fname = Path(fname)
before_text, after_text = hunk_to_before_after(hunk)
# does it want to make a new file?
if not fname.exists() and not before_text.strip():
fname.touch()
content = ""
if content is None:
return
# TODO: handle inserting into new file
if not before_text.strip():
# append to existing file, or start a new file
new_content = content + after_text
return new_content
new_content = None
new_content = apply_hunk(content, hunk)
if new_content:
return new_content
def collapse_repeats(s):
return "".join(k for k, g in groupby(s))
def apply_hunk(content, hunk):
before_text, after_text = hunk_to_before_after(hunk)
res = directly_apply_hunk(content, hunk)
if res:
return res
hunk = make_new_lines_explicit(content, hunk)
# just consider space vs not-space
ops = "".join([line[0] for line in hunk])
ops = ops.replace("-", "x")
ops = ops.replace("+", "x")
ops = ops.replace("\n", " ")
cur_op = " "
section = []
sections = []
for i in range(len(ops)):
op = ops[i]
if op != cur_op:
sections.append(section)
section = []
cur_op = op
section.append(hunk[i])
sections.append(section)
if cur_op != " ":
sections.append([])
all_done = True
for i in range(2, len(sections), 2):
preceding_context = sections[i - 2]
changes = sections[i - 1]
following_context = sections[i]
res = apply_partial_hunk(content, preceding_context, changes, following_context)
if res:
content = res
else:
all_done = False
# FAILED!
# this_hunk = preceding_context + changes + following_context
break
if all_done:
return content
def flexi_just_search_and_replace(texts):
strategies = [
(search_and_replace, all_preprocs),
]
return flexible_search_and_replace(texts, strategies)
def make_new_lines_explicit(content, hunk):
before, after = hunk_to_before_after(hunk)
diff = diff_lines(before, content)
back_diff = []
for line in diff:
if line[0] == "+":
continue
# if line[0] == "-":
# line = "+" + line[1:]
back_diff.append(line)
new_before = directly_apply_hunk(before, back_diff)
if not new_before:
return hunk
if len(new_before.strip()) < 10:
return hunk
before = before.splitlines(keepends=True)
new_before = new_before.splitlines(keepends=True)
after = after.splitlines(keepends=True)
if len(new_before) < len(before) * 0.66:
return hunk
new_hunk = difflib.unified_diff(new_before, after, n=max(len(new_before), len(after)))
new_hunk = list(new_hunk)[3:]
return new_hunk
def cleanup_pure_whitespace_lines(lines):
res = [
line if line.strip() else line[-(len(line) - len(line.rstrip("\r\n")))] for line in lines
]
return res
def normalize_hunk(hunk):
before, after = hunk_to_before_after(hunk, lines=True)
before = cleanup_pure_whitespace_lines(before)
after = cleanup_pure_whitespace_lines(after)
diff = difflib.unified_diff(before, after, n=max(len(before), len(after)))
diff = list(diff)[3:]
return diff
def directly_apply_hunk(content, hunk):
before, after = hunk_to_before_after(hunk)
before_lines, _ = hunk_to_before_after(hunk, lines=True)
before_lines = "".join([line.strip() for line in before_lines])
# Refuse to do a repeated search and replace on a tiny bit of non-whitespace context
if len(before_lines) < 10 and content.count(before) > 1:
return
try:
new_content = flexi_just_search_and_replace([before, after, content])
except SearchTextNotUnique:
new_content = None
return new_content
def apply_partial_hunk(content, preceding_context, changes, following_context):
len_prec = len(preceding_context)
len_foll = len(following_context)
use_all = len_prec + len_foll
for drop in range(use_all):
use = use_all - drop
for use_prec in range(len_prec, -1, -1):
if use_prec > use:
continue
use_foll = use - use_prec
if use_foll > len_foll:
continue
if use_prec:
this_prec = preceding_context[-use_prec:]
else:
this_prec = []
this_foll = following_context[:use_foll]
res = directly_apply_hunk(content, this_prec + changes + this_foll)
if res:
return res
def find_diffs(content):
# We can always use triple-quotes, because all the udiff content
# is prefixed with +/-/space.
if not content.endswith("\n"):
content = content + "\n"
lines = content.splitlines(keepends=True)
line_num = 0
edits = []
while line_num < len(lines):
while line_num < len(lines):
line = lines[line_num]
if line.startswith("```diff"):
line_num, these_edits = process_fenced_block(lines, line_num + 1)
edits += these_edits
break
line_num += 1
# For now, just take 1!
# edits = edits[:1]
return edits
def process_fenced_block(lines, start_line_num):
for line_num in range(start_line_num, len(lines)):
line = lines[line_num]
if line.startswith("```"):
break
block = lines[start_line_num:line_num]
block.append("@@ @@")
if block[0].startswith("--- "):
fname = block[0].split()[1]
block = block[2:]
else:
fname = None
edits = []
keeper = False
hunk = []
op = " "
for line in block:
hunk.append(line)
if len(line) < 2:
continue
op = line[0]
if op in "-+":
keeper = True
continue
if op != "@":
continue
if not keeper:
hunk = []
continue
hunk = hunk[:-1]
edits.append((fname, hunk))
hunk = []
return line_num + 1, edits
def hunk_to_before_after(hunk, lines=False):
before = []
after = []
op = " "
for line in hunk:
if len(line) < 2:
op = " "
line = line
else:
op = line[0]
line = line[1:]
if op == " ":
before.append(line)
after.append(line)
elif op == "-":
before.append(line)
elif op == "+":
after.append(line)
if lines:
return before, after
before = "".join(before)
after = "".join(after)
return before, after

View file

@ -0,0 +1,105 @@
# flake8: noqa: E501
from .base_prompts import CoderPrompts
class UnifiedDiffPrompts(CoderPrompts):
main_system = """Act as an expert software developer.
You are diligent and tireless, and you always COMPLETELY IMPLEMENT the needed code.
Always use best practices when coding.
Respect and use existing conventions, libraries, etc that are already present in the code base.
Take requests for changes to the supplied code.
If the request is ambiguous, ask questions.
For each file that needs to be changed, write out the changes similar to a unified diff like `diff -U0` would produce. For example:
# Example conversation 1
## USER: Replace is_prime with a call to sympy.
## ASSISTANT: Ok, I will:
1. Add an imports of sympy.
2. Remove the is_prime() function.
3. Replace the existing call to is_prime() with a call to sympy.isprime().
Here are the diffs for those changes:
```diff
--- mathweb/flask/app.py
+++ mathweb/flask/app.py
@@ ... @@
-class MathWeb:
+import sympy
+
+class MathWeb:
@@ ... @@
-def is_prime(x):
- if x < 2:
- return False
- for i in range(2, int(math.sqrt(x)) + 1):
- if x % i == 0:
- return False
- return True
@@ ... @@
-@app.route('/prime/<int:n>')
-def nth_prime(n):
- count = 0
- num = 1
- while count < n:
- num += 1
- if is_prime(num):
- count += 1
- return str(num)
+@app.route('/prime/<int:n>')
+def nth_prime(n):
+ count = 0
+ num = 1
+ while count < n:
+ num += 1
+ if sympy.isprime(num):
+ count += 1
+ return str(num)
```
"""
system_reminder = """# File editing rules:
Return edits similar to unified diffs that `diff -U0` would produce.
Make sure you include the first 2 lines with the file paths.
Don't include timestamps with the file paths.
Start each hunk of changes with a `@@ ... @@` line.
Don't include line numbers like `diff -U0` does.
The user's patch tool doesn't need them.
The user's patch tool needs CORRECT patches that apply cleanly against the current contents of the file!
Think carefully and make sure you include and mark all lines that need to be removed or changed as `-` lines.
Make sure you mark all new or modified lines with `+`.
Don't leave out any lines or the diff patch won't apply correctly.
Indentation matters in the diffs!
Start a new hunk for each section of the file that needs changes.
Only output hunks that specify changes with `+` or `-` lines.
Skip any hunks that are entirely unchanging ` ` lines.
Output hunks in whatever order makes the most sense.
Hunks don't need to be in any particular order.
When editing a function, method, loop, etc use a hunk to replace the *entire* code block.
Delete the entire existing version with `-` lines and then add a new, updated version with `+` lines.
This will help you generate correct code and correct diffs.
"""
files_content_prefix = "These are the *read-write* files:\n"
files_no_full_files = "I am not sharing any *read-write* files yet."
repo_content_prefix = """Below here are summaries of other files present in this git repository.
Do not propose changes to these files, they are *read-only*.
To make a file *read-write*, ask the user to *add it to the chat*.
"""