From 8f3cfe1985f5fa17aef3d37a5b8d37ae9f99dbaf Mon Sep 17 00:00:00 2001 From: Emasoft <713559+Emasoft@users.noreply.github.com> Date: Sat, 17 May 2025 04:53:53 +0200 Subject: [PATCH] Added logic to split and batch the changes in architect mode. In this way editor models with small input context limits will be able to handle the code changes just as well as the more expensive models. added option to use the new batch editing feature or to stick to the original architect workflow that makes the changes in one go. Added parameter `--use-batch-editing` and --no-use-batch-editing` as command line argument. Added option `use-batch-editing` to yaml configuration file. --- .gitignore | 4 +- .ropeproject/.gitkeep | 0 aider/args.py | 6 ++ aider/coders/architect_coder.py | 174 ++++++++++++++++++++++++++++-- aider/coders/base_coder.py | 1 + aider/main.py | 1 + tests/basic/test_batch_editing.py | 103 ++++++++++++++++++ 7 files changed, 279 insertions(+), 10 deletions(-) create mode 100644 .ropeproject/.gitkeep create mode 100644 tests/basic/test_batch_editing.py diff --git a/.gitignore b/.gitignore index 8ad33fd3c..5d4e3891a 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,6 @@ aider/_version.py .venv/ .#* .gitattributes -tmp.benchmarks/ \ No newline at end of file +tmp.benchmarks/ +uv.lock +CLAUDE.md diff --git a/.ropeproject/.gitkeep b/.ropeproject/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/aider/args.py b/aider/args.py index 08c9bde76..49d4edb9d 100644 --- a/aider/args.py +++ b/aider/args.py @@ -178,6 +178,12 @@ def get_parser(default_config_files, git_root): default=True, help="Enable/disable automatic acceptance of architect changes (default: True)", ) + group.add_argument( + "--use-batch-editing", + action=argparse.BooleanOptionalAction, + default=False, + help="Enable/disable batch editing for architect mode (default: False)", + ) group.add_argument( "--weak-model", metavar="WEAK_MODEL", diff --git a/aider/coders/architect_coder.py b/aider/coders/architect_coder.py index f3e2a38b1..b7ddddd48 100644 --- a/aider/coders/architect_coder.py +++ b/aider/coders/architect_coder.py @@ -7,6 +7,13 @@ class ArchitectCoder(AskCoder): edit_format = "architect" gpt_prompts = ArchitectPrompts() auto_accept_architect = False + use_batch_editing = False + + def __init__(self, main_model, io, use_batch_editing=False, auto_accept_architect=None, **kwargs): + super().__init__(main_model, io, **kwargs) + if auto_accept_architect is not None: + self.auto_accept_architect = auto_accept_architect + self.use_batch_editing = use_batch_editing def reply_completed(self): content = self.partial_response_content @@ -34,15 +41,164 @@ class ArchitectCoder(AskCoder): new_kwargs = dict(io=self.io, from_coder=self) new_kwargs.update(kwargs) - editor_coder = Coder.create(**new_kwargs) - editor_coder.cur_messages = [] - editor_coder.done_messages = [] + # Use the instance attribute for use_batch_editing - if self.verbose: - editor_coder.show_announcements() + if self.use_batch_editing: + # split the architect model response into chunks using natural delimiters (code blocka, newlines, separators, etc.) + chunks = [] + chunks = self.split_response_by_natural_delimiters(content) - editor_coder.run(with_message=content, preproc=False) + for chunk in chunks: + if not chunk.strip(): + continue + + # Create a new chat session with the editor coder llm model for each chunk of the architect model response + editor_coder = Coder.create(**new_kwargs) + editor_coder.cur_messages = [] + editor_coder.done_messages = [] + + if self.verbose: + editor_coder.show_announcements() + + editor_coder.run(with_message=chunk, preproc=False) + + self.move_back_cur_messages("I made those changes to the files.") + self.total_cost += editor_coder.total_cost + if self.aider_commit_hashes is None: + self.aider_commit_hashes = set() + self.aider_commit_hashes.update(editor_coder.aider_commit_hashes or set()) + else: + # Create only one chat session with the editor coder llm model, not splitting the architect answer in chunks. + editor_coder = Coder.create(**new_kwargs) + editor_coder.cur_messages = [] + editor_coder.done_messages = [] + + if self.verbose: + editor_coder.show_announcements() + + # Run the editor coder with the entire architect model response + editor_coder.run(with_message=content, preproc=False) + + self.move_back_cur_messages("I made those changes to the files.") + self.total_cost = editor_coder.total_cost + self.aider_commit_hashes = editor_coder.aider_commit_hashes + + + def split_response_by_natural_delimiters(self, content): + """ + Splits the content into chunks using natural delimiters, with heuristics: + - Never splits inside code blocks (even nested/mixed fences). + - Detects repeated block patterns (title/tag, blank lines, filename, code block) and splits accordingly. + - Lone comments between blocks are included in both adjacent chunks. + - Groups filename fences with their following code block. + - Groups delimiters/tags with their following block, including blank lines. + - Falls back to delimiter/tag splitting if no repeated pattern is found. + """ + import re + + # Fence definitions + fence_openers = [ + r"```[\w-]*", r"~~~~[\w-]*", + r"", r"
", r"", r"", r"", r"", r""
+        ]
+        fence_closers = [
+            r"```", r"~~~~",
+            r"", r"
", r"", r"", r"", r"", r"" + ] + fence_opener_re = re.compile(rf"^({'|'.join(fence_openers)})\s*$", re.IGNORECASE) + fence_closer_re = re.compile(rf"^({'|'.join(fence_closers)})\s*$", re.IGNORECASE) + + # Patterns for tags/titles, filenames, comments, and delimiters + tag_pattern = re.compile( + r"""( + ^\[[A-Z0-9 _:\-./()]+\]$ | # [ALL CAPS/NUMERIC/UNDERSCORE/ETC] + ^<[\w\s:\-./()|=\[\]!]+>$ | # + ^<<[\w\s:\-./()|=\[\]!]+>>$ | # <> + ^<\|[\w\s:\-./()|=\[\]!]+\|>$ | # <|TAG ...|> + ^<=.*=>$ | # <=...=> + ^$ | # + ^<==\|.*\|==>$ # <==| ... |==> + )""", + re.MULTILINE | re.VERBOSE + ) + filename_pattern = re.compile(r"^[\w\./\\\-]+\.?\w*$") + comment_pattern = re.compile(r"^(#|