aider/aider/utils.py

import re
import math

from difflib import SequenceMatcher
from pathlib import Path

# from aider.dump import dump


def replace_most_similar_chunk(whole, part, replace):
    if part in whole:
        return whole.replace(part, replace)

    similarity_thresh = 0.8

    max_similarity = 0
    most_similar_chunk_start = -1
    most_similar_chunk_end = -1

    whole_lines = whole.splitlines()
    part_lines = part.splitlines()

    scale = 0.1
    min_len = math.floor(len(part_lines) * (1 - scale))
    max_len = math.ceil(len(part_lines) * (1 + scale))

    for length in range(min_len, max_len):
        for i in range(len(whole_lines) - length + 1):
            chunk = whole_lines[i : i + length]
            chunk = "\n".join(chunk)

            similarity = SequenceMatcher(None, chunk, part).ratio()

            if similarity > max_similarity and similarity:
                max_similarity = similarity
                most_similar_chunk_start = i
                most_similar_chunk_end = i + length

    if max_similarity < similarity_thresh:
        return

    replace_lines = replace.splitlines()
    modified_whole = (
        whole_lines[:most_similar_chunk_start]
        + replace_lines
        + whole_lines[most_similar_chunk_end:]
    )
    modified_whole = "\n".join(modified_whole)

    if whole.endswith("\n"):
        modified_whole += "\n"

    return modified_whole


def quoted_file(fname, display_fname):
    prompt = "\n"
    prompt += display_fname
    prompt += "\n```\n"
    prompt += Path(fname).read_text()
    prompt += "\n```\n"
    return prompt


def strip_quoted_wrapping(res, fname=None):
    """
    Given an input string which may have extra "wrapping" around it, remove the wrapping.
    For example:

    filename.ext
    ```
    We just want this content
    Not the filename and triple quotes
    ```
    """
    if not res:
        return res

    res = res.splitlines()

    if fname and res[0].strip().endswith(Path(fname).name):
        res = res[1:]

    if res[0].startswith("```") and res[-1].startswith("```"):
        res = res[1:-1]

    res = "\n".join(res)
    if res and res[-1] != "\n":
        res += "\n"

    return res


def do_replace(fname, before_text, after_text):
    before_text = strip_quoted_wrapping(before_text, fname)
    after_text = strip_quoted_wrapping(after_text, fname)
    fname = Path(fname)

    # does it want to make a new file?
    if not fname.exists() and not before_text.strip():
        fname.touch()

    content = fname.read_text()

    if not before_text.strip():
        if content:
            new_content = content + after_text
        else:
            # first populating an empty file
            new_content = after_text
    else:
        new_content = replace_most_similar_chunk(content, before_text, after_text)
        if not new_content:
            return

    fname.write_text(new_content)
    return True


def show_messages(messages, title):
    print(title.upper(), "*" * 50)

    for msg in messages:
        print()
        print("-" * 50)
        role = msg["role"].upper()
        content = msg["content"].splitlines()
        for line in content:
            print(role, line)


pattern = re.compile(
    # Optional: Matches the start of a code block (e.g., ```python) and any following whitespace
    r"(^```\S*\s*)?"
    # Matches the file path
    r"^(\S+)\s*"
    # Optional: Matches the end of a code block (e.g., ```) and any following whitespace
    r"(^```\S*\s*)?"
    # Matches the start of the ORIGINAL section and captures its content
    r"^<<<<<<< ORIGINAL\n(.*?\n?)"
    # Matches sep between ORIGINAL and UPDATED sections, captures UPDATED content
    r"^=======\n(.*?)"
    # Matches the end of the UPDATED section
    r"^>>>>>>> UPDATED",
    re.MULTILINE | re.DOTALL,
)

ORIGINAL = "<<<<<<< ORIGINAL"
DIVIDER = "======="
UPDATED = ">>>>>>> UPDATED"

separators = "|".join([ORIGINAL, DIVIDER, UPDATED])

split_re = re.compile(r"^((?:" + separators + r")[ ]*\n)", re.MULTILINE | re.DOTALL)


def find_original_update_blocks(content):
    pieces = re.split(split_re, content)

    pieces.reverse()
    processed = []

    try:
        while pieces:
            cur = pieces.pop()

            if cur in (DIVIDER, UPDATED):
                processed.append(cur)
                raise ValueError(f"Unexpected {cur}")

            if cur.strip() != ORIGINAL:
                processed.append(cur)
                continue

            processed.append(cur)  # original_marker

            filename = processed[-2].splitlines()[-1].strip()
            if not len(filename) or "`" in filename:
                filename = processed[-2].splitlines()[-2].strip()
                if not len(filename) or "`" in filename:
                    raise ValueError(f"Bad/missing filename. It should go right above {ORIGINAL}")

            original_text = pieces.pop()
            processed.append(original_text)

            divider_marker = pieces.pop()
            processed.append(divider_marker)
            if divider_marker.strip() != DIVIDER:
                raise ValueError(f"Expected {DIVIDER}")

            updated_text = pieces.pop()

            updated_marker = pieces.pop()
            if updated_marker.strip() != UPDATED:
                raise ValueError(f"Expected {UPDATED}")

            yield filename, original_text, updated_text
    except ValueError as e:
        processed = "".join(processed)
        err = e.args[0]
        raise ValueError(f"{processed}\n^^^ {err}")
    except IndexError:
        processed = "".join(processed)
        raise ValueError(f"{processed}\n^^^ Incomplete ORIGINAL/UPDATED block.")
    except Exception:
        processed = "".join(processed)
        raise ValueError(f"{processed}\n^^^ Error parsing ORIGINAL/UPDATED block.")


if __name__ == "__main__":
    edit = """
Here's the change:

```text
foo.txt
<<<<<<< ORIGINAL
Two
=======
Tooooo
>>>>>>> UPDATED
```

Hope you like it!
"""
    print(list(find_original_update_blocks(edit)))