From 68bd8884f7e836b4cf2a9e4c47645b150d2a312c Mon Sep 17 00:00:00 2001 From: yozerpp Date: Tue, 21 Jan 2025 02:49:45 +0300 Subject: [PATCH 1/3] feat: Add /iterate command --- aider/coders/__init__.py | 3 +- aider/coders/iterate_coder.py | 74 +++++++++++++++++++++++++++++ aider/commands.py | 4 +- tests/basic/test_iterate.py | 87 +++++++++++++++++++++++++++++++++++ 4 files changed, 166 insertions(+), 2 deletions(-) create mode 100644 aider/coders/iterate_coder.py create mode 100644 tests/basic/test_iterate.py diff --git a/aider/coders/__init__.py b/aider/coders/__init__.py index e9d334bc9..21e2fad5b 100644 --- a/aider/coders/__init__.py +++ b/aider/coders/__init__.py @@ -8,7 +8,7 @@ from .editor_whole_coder import EditorWholeFileCoder from .help_coder import HelpCoder from .udiff_coder import UnifiedDiffCoder from .wholefile_coder import WholeFileCoder - +from .iterate_coder import IterateCoder # from .single_wholefile_func_coder import SingleWholeFileFunctionCoder __all__ = [ @@ -17,6 +17,7 @@ __all__ = [ Coder, EditBlockCoder, EditBlockFencedCoder, + IterateCoder, WholeFileCoder, UnifiedDiffCoder, # SingleWholeFileFunctionCoder, diff --git a/aider/coders/iterate_coder.py b/aider/coders/iterate_coder.py new file mode 100644 index 000000000..666b52c37 --- /dev/null +++ b/aider/coders/iterate_coder.py @@ -0,0 +1,74 @@ +from typing import Tuple +import copy + +from aider.coders.base_coder import Coder +"""Perform a coding task on multiple files in batches that fit the context and outpot token limits, without sending them all at once.""" +class IterateCoder(Coder): + coder : Coder = None + original_kwargs: dict = None + edit_format = "iterate" + + def __init__(self, main_model, io, **kwargs): + super().__init__(main_model, io,**kwargs) + + def run_one(self, user_message, preproc): + if self.coder is None: + self.coder = Coder.create(main_model=self.main_model, edit_format=self.main_model.edit_format,from_coder=self,**self.original_kwargs) + remaining_files_with_type_length : list[Tuple[str,bool,int]]=[] + for f in self.abs_fnames: + remaining_files_with_type_length.append((f, True, self.main_model.token_count(self.io.read_text(f)))) + for f in self.abs_read_only_fnames: + remaining_files_with_type_length.append((f,False,self.main_model.token_count(self.io.read_text(f)))) + max_tokens = self.main_model.info.get('max_tokens') + max_context = self.main_model.info['max_input_tokens'] + max_output = self.main_model.info['max_output_tokens'] + repo_token_count = self.main_model.get_repo_map_tokens() + history_token_count = sum([tup[0] for tup in self.summarizer.tokenize( [msg["content"] for msg in self.done_messages])]) + """fitting input files + chat history + repo_map + files_to_send to context limit and + files_to_send to the output limit. + output files are assumed to be greater in size than the input files""" + prev_io = self.io.yes + self.io.yes = True + for files_to_send_with_types in self.file_cruncher( max_context=max_context, + max_output= max_tokens if max_tokens is not None else max_output, + context_tokens=repo_token_count + history_token_count,remaining_files=remaining_files_with_type_length): + self.coder.done_messages=copy.deepcopy(self.done_messages) #reset history of the coder to the start of the /iterate command + self.coder.cur_messages=[] + self.coder.abs_fnames=set([f[0] for f in files_to_send_with_types if f[1]]) + self.coder.abs_read_only_fnames=set(f[0] for f in files_to_send_with_types if not f[1]) + self.coder.run_one(user_message,preproc) + self.io.yes = prev_io + class file_cruncher: + context_tokens: int + max_context:int + max_output:int + remaining_files : list[Tuple[str,bool,int]] + PADDING:int = 50 + def __init__(self,max_context:int,max_output:int,context_tokens,remaining_files : list[Tuple[str,bool,int]] ): + self.context_tokens = context_tokens + self.max_context = max_context + self.max_output = max_output + self.remaining_files = sorted(remaining_files, key = lambda x: x[2]) + def __iter__(self): + return self + def __next__(self): + if len(self.remaining_files) == 0: + raise StopIteration + files_to_send : list[Tuple[str,bool]]= [] + i:int =0 + total_context= 0 + total_output= 0 + for file_name, type_, length in self.remaining_files: + if length + (length + self.PADDING) + self.context_tokens + total_context>= self.max_context or length + self.PADDING + total_output >= self.max_output: + break + total_context+=length + length + self.PADDING + total_output+=length + self.PADDING + files_to_send.append((file_name,type_)) + i+=1 + if i == 0: #no file fits the limits, roll the dice and let the user deal with it + f,t,_ = self.remaining_files[i] + files_to_send.append((f,t)) + i=1 + self.remaining_files = self.remaining_files[i:] + return files_to_send + \ No newline at end of file diff --git a/aider/commands.py b/aider/commands.py index a276b66cb..a6175b056 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -1059,7 +1059,9 @@ class Commands: map_mul_no_files=map_mul_no_files, show_announcements=False, ) - + def cmd_iterate(self, args): + """Iteratively perform the change on files in batches that fit to context and output limits""" + return self._generic_chat_command(args, "iterate") def cmd_ask(self, args): """Ask questions about the code base without editing any files. If no prompt provided, switches to ask mode.""" # noqa return self._generic_chat_command(args, "ask") diff --git a/tests/basic/test_iterate.py b/tests/basic/test_iterate.py new file mode 100644 index 000000000..d1b2b791b --- /dev/null +++ b/tests/basic/test_iterate.py @@ -0,0 +1,87 @@ +import os +import unittest +from pathlib import Path +from unittest.mock import MagicMock, patch +from aider.coders import Coder +from aider.io import InputOutput +from aider.models import Model +from aider.utils import GitTemporaryDirectory + + +class TestIterateCoder(unittest.TestCase): + def setUp(self): + self.GPT35 = Model("gpt-3.5-turbo") + self.io = InputOutput(yes=True) + # self.webbrowser_patcher = patch("aider.io.webbrowser.open") + # self.mock_webbrowser = self.webbrowser_patcher.start() + + # Get all Python files in aider/coders directory + coders_dir = Path(__file__).parent.parent.parent / "aider" / "coders" + self.files = [str(f) for f in coders_dir.glob("*.py") if f.is_file()] + + # Create coder with all files + self.coder = Coder.create( + main_model=self.GPT35, + io=self.io, + fnames=self.files, + edit_format='iterate' + ) + + def tearDown(self): + # self.webbrowser_patcher.stop() + return + """Tests that: + - Every request retains the chat history until the /iterate command but not the history of other iterations. + - Added files and history until the /iterate is unmodified. + - Every file is processed(even if a single file that'll be sent with the request exceeds the limits.) and no duplicate processing + """ + def test_iterate_resets_history_and_processes_all_files(self): + processed_files :list[str]= [] + original_context:list[dict[str,str]] + prev_file_names : list[str] = None + # Track messages sent to LLM and files processed + def mock_send(self,messages, model=None, functions=None): + nonlocal original_context + nonlocal processed_files + nonlocal prev_file_names + for original_message in original_context: + assert original_message in messages, f"Chat history before start of the command is not retained." + # Simulate response mentioning filename + a : str="" + files_message = [msg['content'] for msg in messages if "*added these files to the chat*" in msg['content']][0] + from re import findall + file_names = findall(r'.*\n(\S+\.py)\n```.*',files_message) + for f_name in file_names: + assert prev_file_names == None or f_name not in prev_file_names, "files from previous iterations hasn't been cleaned up." + prev_file_names = file_names + processed_files.extend(file_names) + # Return minimal response + self.partial_response_content = "Done." + self.partial_response_function_call=dict() + + with GitTemporaryDirectory(): + # Mock the send method + with patch.object(Coder, 'send',new_callable=lambda: mock_send): + self.coder.coder = Coder.create(main_model=self.coder.main_model, edit_format=self.coder.main_model.edit_format,from_coder=self.coder,**self.coder.original_kwargs) + + # Add initial conversation history + original_context = self.coder.done_messages = [ + {"role": "user", "content": "Initial conversation"}, + {"role": "assistant", "content": "OK"} + ] + + # Run iterate command + self.coder.run(with_message="Process all files") + # Verify all files were processed + input_basenames = {Path(f).name for f in self.files} + processed_basenames = {Path(f).name for f in processed_files} + missing = input_basenames - processed_basenames + assert not missing, f"Files not processed: {missing}" + + # Verify history preservation and structure + assert len(self.coder.done_messages) == 2, "Original chat history was modified" + # Verify final file state + assert len(self.coder.abs_fnames) == len(self.files), "Not all files remained in chat" + +if __name__ == "__main__": + unittest.main() From 7e232c5823722bccbac53e5ff6b36215708eb30a Mon Sep 17 00:00:00 2001 From: yozerpp Date: Tue, 21 Jan 2025 17:45:13 +0300 Subject: [PATCH 2/3] feat: post-completion batch linting and committing for /iterate command and refactor --- aider/coders/iterate_coder.py | 100 +++++++++++++++++++++++----------- tests/basic/test_iterate.py | 5 +- 2 files changed, 70 insertions(+), 35 deletions(-) diff --git a/aider/coders/iterate_coder.py b/aider/coders/iterate_coder.py index 666b52c37..b55d0718a 100644 --- a/aider/coders/iterate_coder.py +++ b/aider/coders/iterate_coder.py @@ -1,7 +1,8 @@ -from typing import Tuple +from typing import Tuple, override import copy from aider.coders.base_coder import Coder +from aider.coders.base_prompts import CoderPrompts """Perform a coding task on multiple files in batches that fit the context and outpot token limits, without sending them all at once.""" class IterateCoder(Coder): coder : Coder = None @@ -10,65 +11,100 @@ class IterateCoder(Coder): def __init__(self, main_model, io, **kwargs): super().__init__(main_model, io,**kwargs) - + if 'gpt_prompts' not in kwargs: self.gpt_prompts = CoderPrompts() + @override def run_one(self, user_message, preproc): if self.coder is None: self.coder = Coder.create(main_model=self.main_model, edit_format=self.main_model.edit_format,from_coder=self,**self.original_kwargs) - remaining_files_with_type_length : list[Tuple[str,bool,int]]=[] - for f in self.abs_fnames: - remaining_files_with_type_length.append((f, True, self.main_model.token_count(self.io.read_text(f)))) - for f in self.abs_read_only_fnames: - remaining_files_with_type_length.append((f,False,self.main_model.token_count(self.io.read_text(f)))) + self.coder.auto_lint, self.coder.auto_commits = (False,False) + chat_files_with_type_and_length = self.get_chat_files_with_type_and_length() max_tokens = self.main_model.info.get('max_tokens') max_context = self.main_model.info['max_input_tokens'] - max_output = self.main_model.info['max_output_tokens'] + max_output = max_tokens if max_tokens is not None else self.main_model.info['max_output_tokens'] repo_token_count = self.main_model.get_repo_map_tokens() history_token_count = sum([tup[0] for tup in self.summarizer.tokenize( [msg["content"] for msg in self.done_messages])]) - """fitting input files + chat history + repo_map + files_to_send to context limit and - files_to_send to the output limit. - output files are assumed to be greater in size than the input files""" - prev_io = self.io.yes + prev_io= self.io.yes #shell commmands will still need confirmation for each command, this can be overridden by extending InputOutput class and overriding confirm_ask method. self.io.yes = True - for files_to_send_with_types in self.file_cruncher( max_context=max_context, - max_output= max_tokens if max_tokens is not None else max_output, - context_tokens=repo_token_count + history_token_count,remaining_files=remaining_files_with_type_length): - self.coder.done_messages=copy.deepcopy(self.done_messages) #reset history of the coder to the start of the /iterate command - self.coder.cur_messages=[] - self.coder.abs_fnames=set([f[0] for f in files_to_send_with_types if f[1]]) - self.coder.abs_read_only_fnames=set(f[0] for f in files_to_send_with_types if not f[1]) - self.coder.run_one(user_message,preproc) - self.io.yes = prev_io + cruncher = self.file_cruncher( max_context, max_output,repo_token_count + history_token_count, + chat_files_with_type_and_length) + edited_files = self.batch_process(user_message,preproc, cruncher) + self.io.yes= prev_io + if len(edited_files) == 0: return + if self.auto_lint: + cruncher.files_to_crunch = [(fname,True,self.main_model.token_count(self.io.read_text(fname))) for fname in edited_files] + self.batch_lint(cruncher,preproc) + if self.auto_commits: + self.batch_commit(edited_files) + + def get_chat_files_with_type_and_length(self): + chat_files_with_type_and_length : list[Tuple[str,bool,int]]=[] + for f in self.abs_fnames: + chat_files_with_type_and_length.append((f, True, self.main_model.token_count(self.io.read_text(f)))) + for f in self.abs_read_only_fnames: + chat_files_with_type_and_length.append((f,False,self.main_model.token_count(self.io.read_text(f)))) + return chat_files_with_type_and_length + + def batch_process(self,message,preproc, cruncher): + edited_files= [] + for files_to_send_with_types in cruncher: + self.prepare_batch(files_to_send_with_types) + self.coder.run_one(message,preproc) + edited_files.extend(self.coder.aider_edited_files) + self.coder.aider_edited_files = set() + return edited_files + + def prepare_batch(self,files_to_send_with_types : list[Tuple[str,bool]]): + self.coder.done_messages = copy.deepcopy(self.done_messages) + self.coder.cur_messages = [] + self.coder.abs_fnames=set([f[0] for f in files_to_send_with_types if f[1]]) + self.coder.abs_read_only_fnames=set(f[0] for f in files_to_send_with_types if not f[1]) + def batch_lint(self, cruncher,preproc): + for files_with_type in cruncher: + files = [ft[0] for ft in files_with_type] + lint_msg = self.coder.lint_edited(files) + self.auto_commit(files,context="Ran the linter") + if lint_msg: + ok = self.io.confirm_ask("Attempt to fix lint errors?", subject="batch_lint", allow_never=True) + if ok: + self.coder.done_messages, self.coder.cur_messages = ([],[]) + self.coder.run_one(lint_msg,preproc) + def batch_commit(self, files : list[str]): + self.repo.commit(files) + class file_cruncher: context_tokens: int max_context:int max_output:int - remaining_files : list[Tuple[str,bool,int]] + files_to_crunch : list[Tuple[str,bool,int]] PADDING:int = 50 - def __init__(self,max_context:int,max_output:int,context_tokens,remaining_files : list[Tuple[str,bool,int]] ): + def __init__(self,max_context:int,max_output:int,context_tokens,files_to_crunch : list[Tuple[str,bool,int]] ): self.context_tokens = context_tokens self.max_context = max_context self.max_output = max_output - self.remaining_files = sorted(remaining_files, key = lambda x: x[2]) + self.files_to_crunch = sorted(files_to_crunch, key = lambda x: x[2]) def __iter__(self): - return self + return self + """fitting input files + chat history + repo_map + files_to_send to context limit and + files_to_send to the output limit. + output files are assumed to be half the size of input files""" def __next__(self): - if len(self.remaining_files) == 0: + if len(self.files_to_crunch) == 0: raise StopIteration files_to_send : list[Tuple[str,bool]]= [] i:int =0 total_context= 0 total_output= 0 - for file_name, type_, length in self.remaining_files: - if length + (length + self.PADDING) + self.context_tokens + total_context>= self.max_context or length + self.PADDING + total_output >= self.max_output: + for file_name, type_, length in self.files_to_crunch: + if length + length / 2 + self.context_tokens + total_context>= self.max_context or length / 2 + total_output >= self.max_output: break total_context+=length + length + self.PADDING total_output+=length + self.PADDING files_to_send.append((file_name,type_)) i+=1 if i == 0: #no file fits the limits, roll the dice and let the user deal with it - f,t,_ = self.remaining_files[i] - files_to_send.append((f,t)) + f,t,_ = self.files_to_crunch[i] + files_to_send.append((copy.copy(f), t)) i=1 - self.remaining_files = self.remaining_files[i:] + self.files_to_crunch = self.files_to_crunch[i:] return files_to_send - \ No newline at end of file + diff --git a/tests/basic/test_iterate.py b/tests/basic/test_iterate.py index d1b2b791b..07e514811 100644 --- a/tests/basic/test_iterate.py +++ b/tests/basic/test_iterate.py @@ -5,6 +5,7 @@ from unittest.mock import MagicMock, patch from aider.coders import Coder from aider.io import InputOutput from aider.models import Model +from aider.repo import GitRepo from aider.utils import GitTemporaryDirectory @@ -47,7 +48,6 @@ class TestIterateCoder(unittest.TestCase): for original_message in original_context: assert original_message in messages, f"Chat history before start of the command is not retained." # Simulate response mentioning filename - a : str="" files_message = [msg['content'] for msg in messages if "*added these files to the chat*" in msg['content']][0] from re import findall file_names = findall(r'.*\n(\S+\.py)\n```.*',files_message) @@ -61,9 +61,8 @@ class TestIterateCoder(unittest.TestCase): with GitTemporaryDirectory(): # Mock the send method - with patch.object(Coder, 'send',new_callable=lambda: mock_send): + with (patch.object(Coder, 'send',new_callable=lambda: mock_send), patch.object(Coder, 'lint_edited',lambda *_,**__:None), patch.object(GitRepo,'commit',lambda *_,**__:None)): self.coder.coder = Coder.create(main_model=self.coder.main_model, edit_format=self.coder.main_model.edit_format,from_coder=self.coder,**self.coder.original_kwargs) - # Add initial conversation history original_context = self.coder.done_messages = [ {"role": "user", "content": "Initial conversation"}, From 808b16d27b7c4a0fd7288f90d50c0895f381a430 Mon Sep 17 00:00:00 2001 From: yozerpp Date: Fri, 25 Apr 2025 18:38:13 +0300 Subject: [PATCH 3/3] fe: rename /iterate to /batch --- aider/coders/__init__.py | 4 ++-- aider/coders/{iterate_coder.py => batch_coder.py} | 4 ++-- aider/commands.py | 4 ++-- tests/basic/{test_iterate.py => test_batch.py} | 8 ++++---- 4 files changed, 10 insertions(+), 10 deletions(-) rename aider/coders/{iterate_coder.py => batch_coder.py} (99%) rename tests/basic/{test_iterate.py => test_batch.py} (93%) diff --git a/aider/coders/__init__.py b/aider/coders/__init__.py index 21e2fad5b..f7a9f9ad4 100644 --- a/aider/coders/__init__.py +++ b/aider/coders/__init__.py @@ -8,7 +8,7 @@ from .editor_whole_coder import EditorWholeFileCoder from .help_coder import HelpCoder from .udiff_coder import UnifiedDiffCoder from .wholefile_coder import WholeFileCoder -from .iterate_coder import IterateCoder +from .batch_coder import BatchCoder # from .single_wholefile_func_coder import SingleWholeFileFunctionCoder __all__ = [ @@ -17,7 +17,7 @@ __all__ = [ Coder, EditBlockCoder, EditBlockFencedCoder, - IterateCoder, + BatchCoder, WholeFileCoder, UnifiedDiffCoder, # SingleWholeFileFunctionCoder, diff --git a/aider/coders/iterate_coder.py b/aider/coders/batch_coder.py similarity index 99% rename from aider/coders/iterate_coder.py rename to aider/coders/batch_coder.py index b55d0718a..c7a0bcbc0 100644 --- a/aider/coders/iterate_coder.py +++ b/aider/coders/batch_coder.py @@ -4,10 +4,10 @@ import copy from aider.coders.base_coder import Coder from aider.coders.base_prompts import CoderPrompts """Perform a coding task on multiple files in batches that fit the context and outpot token limits, without sending them all at once.""" -class IterateCoder(Coder): +class BatchCoder(Coder): coder : Coder = None original_kwargs: dict = None - edit_format = "iterate" + edit_format = "batch" def __init__(self, main_model, io, **kwargs): super().__init__(main_model, io,**kwargs) diff --git a/aider/commands.py b/aider/commands.py index b303644f3..546541f81 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -1064,9 +1064,9 @@ class Commands: map_mul_no_files=map_mul_no_files, show_announcements=False, ) - def cmd_iterate(self, args): + def cmd_batch(self, args): """Iteratively perform the change on files in batches that fit to context and output limits""" - return self._generic_chat_command(args, "iterate") + return self._generic_chat_command(args, "batch") def cmd_ask(self, args): """Ask questions about the code base without editing any files. If no prompt provided, switches to ask mode.""" # noqa return self._generic_chat_command(args, "ask") diff --git a/tests/basic/test_iterate.py b/tests/basic/test_batch.py similarity index 93% rename from tests/basic/test_iterate.py rename to tests/basic/test_batch.py index 07e514811..8d7fa7531 100644 --- a/tests/basic/test_iterate.py +++ b/tests/basic/test_batch.py @@ -9,7 +9,7 @@ from aider.repo import GitRepo from aider.utils import GitTemporaryDirectory -class TestIterateCoder(unittest.TestCase): +class TestBatchCoder(unittest.TestCase): def setUp(self): self.GPT35 = Model("gpt-3.5-turbo") self.io = InputOutput(yes=True) @@ -25,15 +25,15 @@ class TestIterateCoder(unittest.TestCase): main_model=self.GPT35, io=self.io, fnames=self.files, - edit_format='iterate' + edit_format='batch' ) def tearDown(self): # self.webbrowser_patcher.stop() return """Tests that: - - Every request retains the chat history until the /iterate command but not the history of other iterations. - - Added files and history until the /iterate is unmodified. + - Every request retains the chat history until the /batch command but not the history of other iterations. + - Added files and history until the /batch is unmodified. - Every file is processed(even if a single file that'll be sent with the request exceeds the limits.) and no duplicate processing """ def test_iterate_resets_history_and_processes_all_files(self):