From 8f3cfe1985f5fa17aef3d37a5b8d37ae9f99dbaf Mon Sep 17 00:00:00 2001
From: Emasoft <713559+Emasoft@users.noreply.github.com>
Date: Sat, 17 May 2025 04:53:53 +0200
Subject: [PATCH] Added logic to split and batch the changes in architect mode.
 In this way editor models with small input context limits will be able to
 handle the code changes just as well as the more expensive models.

added option to use the new batch editing feature or to stick to the
original architect workflow that makes the changes in one go.

Added parameter `--use-batch-editing` and --no-use-batch-editing` as
command line argument.

Added option `use-batch-editing` to yaml configuration file.
---
 .gitignore                        |   4 +-
 .ropeproject/.gitkeep             |   0
 aider/args.py                     |   6 ++
 aider/coders/architect_coder.py   | 174 ++++++++++++++++++++++++++++--
 aider/coders/base_coder.py        |   1 +
 aider/main.py                     |   1 +
 tests/basic/test_batch_editing.py | 103 ++++++++++++++++++
 7 files changed, 279 insertions(+), 10 deletions(-)
 create mode 100644 .ropeproject/.gitkeep
 create mode 100644 tests/basic/test_batch_editing.py

diff --git a/.gitignore b/.gitignore
index 8ad33fd3c..5d4e3891a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,4 +15,6 @@ aider/_version.py
 .venv/
 .#*
 .gitattributes
-tmp.benchmarks/
\ No newline at end of file
+tmp.benchmarks/
+uv.lock
+CLAUDE.md
diff --git a/.ropeproject/.gitkeep b/.ropeproject/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/aider/args.py b/aider/args.py
index 08c9bde76..49d4edb9d 100644
--- a/aider/args.py
+++ b/aider/args.py
@@ -178,6 +178,12 @@ def get_parser(default_config_files, git_root):
         default=True,
         help="Enable/disable automatic acceptance of architect changes (default: True)",
     )
+    group.add_argument(
+        "--use-batch-editing",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="Enable/disable batch editing for architect mode (default: False)",
+    )
     group.add_argument(
         "--weak-model",
         metavar="WEAK_MODEL",
diff --git a/aider/coders/architect_coder.py b/aider/coders/architect_coder.py
index f3e2a38b1..b7ddddd48 100644
--- a/aider/coders/architect_coder.py
+++ b/aider/coders/architect_coder.py
@@ -7,6 +7,13 @@ class ArchitectCoder(AskCoder):
     edit_format = "architect"
     gpt_prompts = ArchitectPrompts()
     auto_accept_architect = False
+    use_batch_editing = False
+    
+    def __init__(self, main_model, io, use_batch_editing=False, auto_accept_architect=None, **kwargs):
+        super().__init__(main_model, io, **kwargs)
+        if auto_accept_architect is not None:
+            self.auto_accept_architect = auto_accept_architect
+        self.use_batch_editing = use_batch_editing
 
     def reply_completed(self):
         content = self.partial_response_content
@@ -34,15 +41,164 @@ class ArchitectCoder(AskCoder):
         new_kwargs = dict(io=self.io, from_coder=self)
         new_kwargs.update(kwargs)
 
-        editor_coder = Coder.create(**new_kwargs)
-        editor_coder.cur_messages = []
-        editor_coder.done_messages = []
+        # Use the instance attribute for use_batch_editing
 
-        if self.verbose:
-            editor_coder.show_announcements()
+        if self.use_batch_editing:
+            # split the architect model response into chunks using natural delimiters (code blocka, newlines, separators, etc.)
+            chunks = []
+            chunks = self.split_response_by_natural_delimiters(content)
 
-        editor_coder.run(with_message=content, preproc=False)
+            for chunk in chunks:
+                if not chunk.strip():
+                    continue
+
+                # Create a new chat session with the editor coder llm model for each chunk of the architect model response
+                editor_coder = Coder.create(**new_kwargs)
+                editor_coder.cur_messages = []
+                editor_coder.done_messages = []
+
+                if self.verbose:
+                    editor_coder.show_announcements()
+
+                editor_coder.run(with_message=chunk, preproc=False)
+
+                self.move_back_cur_messages("I made those changes to the files.")
+                self.total_cost += editor_coder.total_cost
+                if self.aider_commit_hashes is None:
+                    self.aider_commit_hashes = set()
+                self.aider_commit_hashes.update(editor_coder.aider_commit_hashes or set())
+        else:
+            # Create only one chat session with the editor coder llm model, not splitting the architect answer in chunks.
+            editor_coder = Coder.create(**new_kwargs)
+            editor_coder.cur_messages = []
+            editor_coder.done_messages = []
+
+            if self.verbose:
+                editor_coder.show_announcements()
+
+            # Run the editor coder with the entire architect model response
+            editor_coder.run(with_message=content, preproc=False)
+
+            self.move_back_cur_messages("I made those changes to the files.")
+            self.total_cost = editor_coder.total_cost
+            self.aider_commit_hashes = editor_coder.aider_commit_hashes
+
+
+    def split_response_by_natural_delimiters(self, content):
+        """
+        Splits the content into chunks using natural delimiters, with heuristics:
+        - Never splits inside code blocks (even nested/mixed fences).
+        - Detects repeated block patterns (title/tag, blank lines, filename, code block) and splits accordingly.
+        - Lone comments between blocks are included in both adjacent chunks.
+        - Groups filename fences with their following code block.
+        - Groups delimiters/tags with their following block, including blank lines.
+        - Falls back to delimiter/tag splitting if no repeated pattern is found.
+        """
+        import re
+
+        # Fence definitions
+        fence_openers = [
+            r"```[\w-]*", r"~~~~[\w-]*",
+            r"<code>", r"<pre>", r"<source>", r"<codeblock>", r"<sourcecode>", r"<diff>", r"<diff-fenced>"
+        ]
+        fence_closers = [
+            r"```", r"~~~~",
+            r"</code>", r"</pre>", r"</source>", r"</codeblock>", r"</sourcecode>", r"</diff>", r"</diff-fenced>"
+        ]
+        fence_opener_re = re.compile(rf"^({'|'.join(fence_openers)})\s*$", re.IGNORECASE)
+        fence_closer_re = re.compile(rf"^({'|'.join(fence_closers)})\s*$", re.IGNORECASE)
+
+        # Patterns for tags/titles, filenames, comments, and delimiters
+        tag_pattern = re.compile(
+            r"""(
+                ^\[[A-Z0-9 _:\-./()]+\]$ |                 # [ALL CAPS/NUMERIC/UNDERSCORE/ETC]
+                ^<[\w\s:\-./()|=\[\]!]+>$ |                # <TAG ...>
+                ^<<[\w\s:\-./()|=\[\]!]+>>$ |              # <<TAG ...>>
+                ^<\|[\w\s:\-./()|=\[\]!]+\|>$ |            # <|TAG ...|>
+                ^<=.*=>$ |                                 # <=...=>
+                ^<!.*!>$ |                                 # <!....!>
+                ^<==\|.*\|==>$                             # <==| ... |==>
+            )""",
+            re.MULTILINE | re.VERBOSE
+        )
+        filename_pattern = re.compile(r"^[\w\./\\\-]+\.?\w*$")
+        comment_pattern = re.compile(r"^(#|<!--).*")
+        delimiter_pattern = re.compile(
+            r"""(
+                ^----$ | ^={3,}$ | ^\s*#+\s.*?$ | \n{3,}
+            )""",
+            re.MULTILINE | re.VERBOSE
+        )
+
+        lines = content.splitlines(keepends=True)
+        n = len(lines)
+
+        # Step 1: Find all block start indices using the repeated pattern heuristic
+        block_starts = []
+        i = 0
+        while i < n:
+            # Look for 1-2 blank lines, then a tag/title, then 0-2 blank lines, then optional filename, then a fence opener
+            j = i
+            # Skip up to 2 blank lines
+            blanks = 0
+            while j < n and lines[j].strip() == "" and blanks < 2:
+                j += 1
+                blanks += 1
+            # Tag/title
+            if j < n and tag_pattern.match(lines[j]):
+                tag_idx = j
+                j += 1
+                # Up to 2 blank lines
+                blanks2 = 0
+                while j < n and lines[j].strip() == "" and blanks2 < 2:
+                    j += 1
+                    blanks2 += 1
+                # Optional filename
+                if j < n and filename_pattern.match(lines[j].strip()):
+                    j += 1
+                # Fence opener
+                if j < n and fence_opener_re.match(lines[j]):
+                    block_starts.append(i)
+                    # Move to the end of the code block (handle nesting)
+                    fence_stack = [fence_opener_re.match(lines[j]).group(1)]
+                    j += 1
+                    while j < n and fence_stack:
+                        if fence_opener_re.match(lines[j]):
+                            fence_stack.append(fence_opener_re.match(lines[j]).group(1))
+                        elif fence_closer_re.match(lines[j]):
+                            if fence_stack and fence_closer_re.match(lines[j]).group(1).lower().replace("-", "") == fence_stack[-1].lower().replace("-", ""):
+                                fence_stack.pop()
+                        j += 1
+                    i = j
+                    continue
+            i += 1
+
+        # If no repeated pattern found, fallback to delimiter/tag/code block splitting
+        if not block_starts:
+            block_starts = [0]
+            i = 0
+            while i < n:
+                # Find next delimiter/tag outside code blocks
+                if tag_pattern.match(lines[i]) or delimiter_pattern.match(lines[i]):
+                    block_starts.append(i)
+                i += 1
+
+        # Step 2: Split into chunks, handling lone comments
+        chunks = []
+        for idx, start in enumerate(block_starts):
+            end = block_starts[idx + 1] if idx + 1 < len(block_starts) else n
+            chunk_lines = lines[start:end]
+
+            # If the last line(s) are lone comments, keep them for the next chunk too
+            comment_lines = []
+            while chunk_lines and comment_pattern.match(chunk_lines[-1]) and not fence_opener_re.match(chunk_lines[-1]):
+                comment_lines.insert(0, chunk_lines.pop())
+            chunk = "".join(chunk_lines)
+            if chunk.strip():
+                chunks.append(chunk)
+            # Add comment lines to the next chunk as well
+            if comment_lines and idx + 1 < len(block_starts):
+                lines[block_starts[idx + 1]:block_starts[idx + 1]] = comment_lines
+
+        return chunks
 
-        self.move_back_cur_messages("I made those changes to the files.")
-        self.total_cost = editor_coder.total_cost
-        self.aider_commit_hashes = editor_coder.aider_commit_hashes
diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py
index 5dbe03cf9..0c696c7d8 100755
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@@ -335,6 +335,7 @@ class Coder:
         file_watcher=None,
         auto_copy_context=False,
         auto_accept_architect=True,
+        use_batch_editing=False,
     ):
         # Fill in a dummy Analytics if needed, but it is never .enable()'d
         self.analytics = analytics if analytics is not None else Analytics()
diff --git a/aider/main.py b/aider/main.py
index ea344f0ba..12f437167 100644
--- a/aider/main.py
+++ b/aider/main.py
@@ -996,6 +996,7 @@ def main(argv=None, input=None, output=None, force_git_root=None, return_coder=F
             detect_urls=args.detect_urls,
             auto_copy_context=args.copy_paste,
             auto_accept_architect=args.auto_accept_architect,
+            use_batch_editing=args.use_batch_editing,
         )
     except UnknownEditFormat as err:
         io.tool_error(str(err))
diff --git a/tests/basic/test_batch_editing.py b/tests/basic/test_batch_editing.py
new file mode 100644
index 000000000..149c6a3ad
--- /dev/null
+++ b/tests/basic/test_batch_editing.py
@@ -0,0 +1,103 @@
+import unittest
+from unittest.mock import MagicMock, patch
+
+from aider.coders.architect_coder import ArchitectCoder
+from aider.io import InputOutput
+from aider.models import Model
+
+
+class TestBatchEditing(unittest.TestCase):
+    def setUp(self):
+        self.GPT35 = Model("gpt-3.5-turbo")
+        self.webbrowser_patcher = patch("aider.io.webbrowser.open")
+        self.mock_webbrowser = self.webbrowser_patcher.start()
+
+    def tearDown(self):
+        self.webbrowser_patcher.stop()
+
+    def test_batch_editing_default_value(self):
+        """Test that the default value for use_batch_editing is False"""
+        # Create an architect coder with default settings
+        io = InputOutput(yes=True)
+        with patch("aider.coders.architect_coder.AskCoder.__init__", return_value=None):
+            coder = ArchitectCoder(main_model=self.GPT35, io=io)
+            
+            # Check that the default value is False
+            self.assertFalse(coder.use_batch_editing)
+
+    def test_batch_editing_parameter_passing(self):
+        """Test that the use_batch_editing parameter is correctly passed to the ArchitectCoder"""
+        io = InputOutput(yes=True)
+        
+        # Test with explicit True setting
+        with patch("aider.coders.architect_coder.AskCoder.__init__", return_value=None):
+            coder = ArchitectCoder(main_model=self.GPT35, io=io, use_batch_editing=True)
+            self.assertTrue(coder.use_batch_editing)
+        
+        # Test with explicit False setting
+        with patch("aider.coders.architect_coder.AskCoder.__init__", return_value=None):
+            coder = ArchitectCoder(main_model=self.GPT35, io=io, use_batch_editing=False)
+            self.assertFalse(coder.use_batch_editing)
+
+    def test_batch_editing_usage_in_reply_completed(self):
+        """Test that the use_batch_editing attribute controls the flow in reply_completed"""
+        io = InputOutput(yes=True)
+        io.confirm_ask = MagicMock(return_value=True)
+        
+        # Create an ArchitectCoder with use_batch_editing=True
+        with patch("aider.coders.architect_coder.AskCoder.__init__", return_value=None):
+            coder = ArchitectCoder(main_model=self.GPT35, io=io)
+            # Set up the necessary attributes manually
+            coder.io = io  # Need to set this explicitly since we're mocking __init__
+            coder.main_model = self.GPT35
+            coder.auto_accept_architect = True
+            coder.verbose = False
+            coder.total_cost = 0
+            coder.cur_messages = []
+            coder.done_messages = []
+            coder.aider_commit_hashes = None
+            coder.move_back_cur_messages = MagicMock()
+            
+            # Mock the split_response_by_natural_delimiters method
+            coder.split_response_by_natural_delimiters = MagicMock()
+            coder.split_response_by_natural_delimiters.return_value = ["chunk1", "chunk2"]
+            
+            # Mock editor_coder creation and execution
+            mock_editor = MagicMock()
+            mock_editor.total_cost = 0
+            mock_editor.aider_commit_hashes = set()
+            
+            # Test with use_batch_editing=True
+            coder.use_batch_editing = True
+            with patch("aider.coders.architect_coder.Coder.create", return_value=mock_editor):
+                # Set partial response content
+                coder.partial_response_content = "Make these changes to the code"
+                
+                # Call reply_completed
+                coder.reply_completed()
+                
+                # Verify split_response_by_natural_delimiters was called
+                coder.split_response_by_natural_delimiters.assert_called_once_with("Make these changes to the code")
+                
+                # Verify Coder.create was called twice (once for each chunk)
+                self.assertEqual(mock_editor.run.call_count, 2)
+            
+            # Reset mocks
+            coder.split_response_by_natural_delimiters.reset_mock()
+            mock_editor.run.reset_mock()
+            
+            # Test with use_batch_editing=False
+            coder.use_batch_editing = False
+            with patch("aider.coders.architect_coder.Coder.create", return_value=mock_editor):
+                # Call reply_completed
+                coder.reply_completed()
+                
+                # Verify split_response_by_natural_delimiters was NOT called
+                coder.split_response_by_natural_delimiters.assert_not_called()
+                
+                # Verify Coder.create was called once for the entire content
+                mock_editor.run.assert_called_once()
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file