diff --git a/prompts.py b/prompts.py index 2ed18ba6b..381eb1dc1 100644 --- a/prompts.py +++ b/prompts.py @@ -57,6 +57,7 @@ THE ORIGINAL BLOCK MUST BE THE *EXACT* LINES FROM THE FILE. IT *MUST* INCLUDE: - ALL THE ORIGINAL LEADING SPACES AND INDENTATION! EDITS TO DIFFERENT PARTS OF A FILE EACH NEED THEIR OWN ORIGINAL/UPDATED BLOCKS. +EVEN NEARBY PARTS NEED THEIR OWN ORIGINAL/UPDATED BLOCKS. ''' diff --git a/utils.py b/utils.py new file mode 100644 index 000000000..7a1831c7c --- /dev/null +++ b/utils.py @@ -0,0 +1,43 @@ +import math + +from difflib import SequenceMatcher + +# from dump import dump + + +def replace_most_similar_chunk(whole, part, replace): + similarity_thresh = 0.8 + max_similarity = 0 + most_similar_chunk_start = -1 + most_similar_chunk_end = -1 + + whole_lines = whole.splitlines() + part_lines = part.splitlines() + + scale = 0.1 + min_len = math.floor(len(part_lines) * (1 - scale)) + max_len = math.ceil(len(part_lines) * (1 + scale)) + + for length in range(min_len, max_len): + for i in range(len(whole_lines) - length + 1): + chunk = whole_lines[i : i + length + 1] + chunk = "\n".join(chunk) + + similarity = SequenceMatcher(None, chunk, part).ratio() + + if similarity > max_similarity and similarity: + max_similarity = similarity + most_similar_chunk_start = i + most_similar_chunk_end = i + length + 1 + + if max_similarity < similarity_thresh: + return + + replace_lines = replace.splitlines() + modified_whole = ( + whole_lines[:most_similar_chunk_start] + + replace_lines + + whole_lines[most_similar_chunk_end:] + ) + modified_whole = "\n".join(modified_whole) + return modified_whole