From 4dee2ab2f4fa421f9a9c8233b902e9a4dfdf5639 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 31 Dec 2023 09:40:08 -0800 Subject: [PATCH] aider: Fix parsing of file paths with spaces in udiff_coder.py. # Aider chat conversation: USER: I just tried aider on a project where the folder names have spaces, and it has an issue parsing the diffs. For example: ```diff --- PRAW and URS Reddit Scraping/Classify.py +++ PRAW and URS Reddit Scraping/Classify.py @@ ... @@ def classify(filename:str, model: str = 'gpt-3.5-turbo-1106'): d = json.load(open(filename)) for post in d['fetched_posts']: messages = copy(MESSAGE_TEMPLATE_CLASSIFY) ... ``` Gives: ```Allow creation of new file PRAW? y``` Which is not correct, a new file is not being created here. ASSISTANT: To address the issue with folder names containing spaces, we need to update the `find_diffs` function in `udiff_coder.py` to correctly parse file paths that include spaces. The current implementation seems to split the file path on spaces, which leads to incorrect behavior when folder or file names contain spaces. Here's the diff with the necessary changes: ```diff --- aider/coders/udiff_coder.py +++ aider/coders/udiff_coder.py @@ ... @@ def find_diffs(content): # We can always use triple-quotes, because all the udiff content # is prefixed with +/-/space. if not content.endswith("\n"): content = content + "\n" lines = content.splitlines(keepends=True) line_num = 0 edits = [] while line_num < len(lines): while line_num < len(lines): line = lines[line_num] if line.startswith("```diff"): line_num, these_edits = process_fenced_block(lines, line_num + 1) edits += these_edits break line_num += 1 # For now, just take 1! # edits = edits[:1] return edits def process_fenced_block(lines, start_line_num): for line_num in range(start_line_num, len(lines)): line = lines[line_num] if line.startswith("```"): break block = lines[start_line_num:line_num] block.append("@@ @@") - if block[1].startswith("+++ "): - fname = block[1].split()[1] + if block[1].startswith("--- ") and block[2].startswith("+++ "): + # Extract the file path, considering that it might contain spaces + fname = block[1][4:].strip() + # Ensure that the file path is the same for both the original and new file + assert fname == block[2][4:].strip(), "File paths in diff headers do not match" block = block[2:] else: fname = None edits = [] keeper = False hunk = [] op = " " for line in block: hunk.append(line) if len(line) < 2: continue op = line[0] if op in "-+": keeper = True continue if op != "@": continue if not keeper: hunk = [] continue hunk = hunk[:-1] edits.append((fname, hunk)) hunk = [] return line_num + 1, edits @@ ... @@ ``` This change ensures that the file path is correctly extracted from the diff headers, even if it contains spaces. It also adds an assertion to check that the file paths for the original and new files match, which is a common requirement in diffs. --- aider/coders/udiff_coder.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/aider/coders/udiff_coder.py b/aider/coders/udiff_coder.py index c5fe7bb6b..05583a08d 100644 --- a/aider/coders/udiff_coder.py +++ b/aider/coders/udiff_coder.py @@ -334,8 +334,11 @@ def process_fenced_block(lines, start_line_num): block = lines[start_line_num:line_num] block.append("@@ @@") - if block[1].startswith("+++ "): - fname = block[1].split()[1] + if block[1].startswith("--- ") and block[2].startswith("+++ "): + # Extract the file path, considering that it might contain spaces + fname = block[1][4:].strip() + # Ensure that the file path is the same for both the original and new file + assert fname == block[2][4:].strip(), "File paths in diff headers do not match" block = block[2:] else: fname = None