feat: post-completion batch linting and committing for /iterate command and refactor

This commit is contained in:
yozerpp 2025-01-21 17:45:13 +03:00
parent 68bd8884f7
commit 7e232c5823
2 changed files with 70 additions and 35 deletions

View file

@ -1,7 +1,8 @@
from typing import Tuple from typing import Tuple, override
import copy import copy
from aider.coders.base_coder import Coder from aider.coders.base_coder import Coder
from aider.coders.base_prompts import CoderPrompts
"""Perform a coding task on multiple files in batches that fit the context and outpot token limits, without sending them all at once.""" """Perform a coding task on multiple files in batches that fit the context and outpot token limits, without sending them all at once."""
class IterateCoder(Coder): class IterateCoder(Coder):
coder : Coder = None coder : Coder = None
@ -10,65 +11,100 @@ class IterateCoder(Coder):
def __init__(self, main_model, io, **kwargs): def __init__(self, main_model, io, **kwargs):
super().__init__(main_model, io,**kwargs) super().__init__(main_model, io,**kwargs)
if 'gpt_prompts' not in kwargs: self.gpt_prompts = CoderPrompts()
@override
def run_one(self, user_message, preproc): def run_one(self, user_message, preproc):
if self.coder is None: if self.coder is None:
self.coder = Coder.create(main_model=self.main_model, edit_format=self.main_model.edit_format,from_coder=self,**self.original_kwargs) self.coder = Coder.create(main_model=self.main_model, edit_format=self.main_model.edit_format,from_coder=self,**self.original_kwargs)
remaining_files_with_type_length : list[Tuple[str,bool,int]]=[] self.coder.auto_lint, self.coder.auto_commits = (False,False)
for f in self.abs_fnames: chat_files_with_type_and_length = self.get_chat_files_with_type_and_length()
remaining_files_with_type_length.append((f, True, self.main_model.token_count(self.io.read_text(f))))
for f in self.abs_read_only_fnames:
remaining_files_with_type_length.append((f,False,self.main_model.token_count(self.io.read_text(f))))
max_tokens = self.main_model.info.get('max_tokens') max_tokens = self.main_model.info.get('max_tokens')
max_context = self.main_model.info['max_input_tokens'] max_context = self.main_model.info['max_input_tokens']
max_output = self.main_model.info['max_output_tokens'] max_output = max_tokens if max_tokens is not None else self.main_model.info['max_output_tokens']
repo_token_count = self.main_model.get_repo_map_tokens() repo_token_count = self.main_model.get_repo_map_tokens()
history_token_count = sum([tup[0] for tup in self.summarizer.tokenize( [msg["content"] for msg in self.done_messages])]) history_token_count = sum([tup[0] for tup in self.summarizer.tokenize( [msg["content"] for msg in self.done_messages])])
"""fitting input files + chat history + repo_map + files_to_send to context limit and prev_io= self.io.yes #shell commmands will still need confirmation for each command, this can be overridden by extending InputOutput class and overriding confirm_ask method.
files_to_send to the output limit.
output files are assumed to be greater in size than the input files"""
prev_io = self.io.yes
self.io.yes = True self.io.yes = True
for files_to_send_with_types in self.file_cruncher( max_context=max_context, cruncher = self.file_cruncher( max_context, max_output,repo_token_count + history_token_count,
max_output= max_tokens if max_tokens is not None else max_output, chat_files_with_type_and_length)
context_tokens=repo_token_count + history_token_count,remaining_files=remaining_files_with_type_length): edited_files = self.batch_process(user_message,preproc, cruncher)
self.coder.done_messages=copy.deepcopy(self.done_messages) #reset history of the coder to the start of the /iterate command self.io.yes= prev_io
self.coder.cur_messages=[] if len(edited_files) == 0: return
self.coder.abs_fnames=set([f[0] for f in files_to_send_with_types if f[1]]) if self.auto_lint:
self.coder.abs_read_only_fnames=set(f[0] for f in files_to_send_with_types if not f[1]) cruncher.files_to_crunch = [(fname,True,self.main_model.token_count(self.io.read_text(fname))) for fname in edited_files]
self.coder.run_one(user_message,preproc) self.batch_lint(cruncher,preproc)
self.io.yes = prev_io if self.auto_commits:
self.batch_commit(edited_files)
def get_chat_files_with_type_and_length(self):
chat_files_with_type_and_length : list[Tuple[str,bool,int]]=[]
for f in self.abs_fnames:
chat_files_with_type_and_length.append((f, True, self.main_model.token_count(self.io.read_text(f))))
for f in self.abs_read_only_fnames:
chat_files_with_type_and_length.append((f,False,self.main_model.token_count(self.io.read_text(f))))
return chat_files_with_type_and_length
def batch_process(self,message,preproc, cruncher):
edited_files= []
for files_to_send_with_types in cruncher:
self.prepare_batch(files_to_send_with_types)
self.coder.run_one(message,preproc)
edited_files.extend(self.coder.aider_edited_files)
self.coder.aider_edited_files = set()
return edited_files
def prepare_batch(self,files_to_send_with_types : list[Tuple[str,bool]]):
self.coder.done_messages = copy.deepcopy(self.done_messages)
self.coder.cur_messages = []
self.coder.abs_fnames=set([f[0] for f in files_to_send_with_types if f[1]])
self.coder.abs_read_only_fnames=set(f[0] for f in files_to_send_with_types if not f[1])
def batch_lint(self, cruncher,preproc):
for files_with_type in cruncher:
files = [ft[0] for ft in files_with_type]
lint_msg = self.coder.lint_edited(files)
self.auto_commit(files,context="Ran the linter")
if lint_msg:
ok = self.io.confirm_ask("Attempt to fix lint errors?", subject="batch_lint", allow_never=True)
if ok:
self.coder.done_messages, self.coder.cur_messages = ([],[])
self.coder.run_one(lint_msg,preproc)
def batch_commit(self, files : list[str]):
self.repo.commit(files)
class file_cruncher: class file_cruncher:
context_tokens: int context_tokens: int
max_context:int max_context:int
max_output:int max_output:int
remaining_files : list[Tuple[str,bool,int]] files_to_crunch : list[Tuple[str,bool,int]]
PADDING:int = 50 PADDING:int = 50
def __init__(self,max_context:int,max_output:int,context_tokens,remaining_files : list[Tuple[str,bool,int]] ): def __init__(self,max_context:int,max_output:int,context_tokens,files_to_crunch : list[Tuple[str,bool,int]] ):
self.context_tokens = context_tokens self.context_tokens = context_tokens
self.max_context = max_context self.max_context = max_context
self.max_output = max_output self.max_output = max_output
self.remaining_files = sorted(remaining_files, key = lambda x: x[2]) self.files_to_crunch = sorted(files_to_crunch, key = lambda x: x[2])
def __iter__(self): def __iter__(self):
return self return self
"""fitting input files + chat history + repo_map + files_to_send to context limit and
files_to_send to the output limit.
output files are assumed to be half the size of input files"""
def __next__(self): def __next__(self):
if len(self.remaining_files) == 0: if len(self.files_to_crunch) == 0:
raise StopIteration raise StopIteration
files_to_send : list[Tuple[str,bool]]= [] files_to_send : list[Tuple[str,bool]]= []
i:int =0 i:int =0
total_context= 0 total_context= 0
total_output= 0 total_output= 0
for file_name, type_, length in self.remaining_files: for file_name, type_, length in self.files_to_crunch:
if length + (length + self.PADDING) + self.context_tokens + total_context>= self.max_context or length + self.PADDING + total_output >= self.max_output: if length + length / 2 + self.context_tokens + total_context>= self.max_context or length / 2 + total_output >= self.max_output:
break break
total_context+=length + length + self.PADDING total_context+=length + length + self.PADDING
total_output+=length + self.PADDING total_output+=length + self.PADDING
files_to_send.append((file_name,type_)) files_to_send.append((file_name,type_))
i+=1 i+=1
if i == 0: #no file fits the limits, roll the dice and let the user deal with it if i == 0: #no file fits the limits, roll the dice and let the user deal with it
f,t,_ = self.remaining_files[i] f,t,_ = self.files_to_crunch[i]
files_to_send.append((f,t)) files_to_send.append((copy.copy(f), t))
i=1 i=1
self.remaining_files = self.remaining_files[i:] self.files_to_crunch = self.files_to_crunch[i:]
return files_to_send return files_to_send

View file

@ -5,6 +5,7 @@ from unittest.mock import MagicMock, patch
from aider.coders import Coder from aider.coders import Coder
from aider.io import InputOutput from aider.io import InputOutput
from aider.models import Model from aider.models import Model
from aider.repo import GitRepo
from aider.utils import GitTemporaryDirectory from aider.utils import GitTemporaryDirectory
@ -47,7 +48,6 @@ class TestIterateCoder(unittest.TestCase):
for original_message in original_context: for original_message in original_context:
assert original_message in messages, f"Chat history before start of the command is not retained." assert original_message in messages, f"Chat history before start of the command is not retained."
# Simulate response mentioning filename # Simulate response mentioning filename
a : str=""
files_message = [msg['content'] for msg in messages if "*added these files to the chat*" in msg['content']][0] files_message = [msg['content'] for msg in messages if "*added these files to the chat*" in msg['content']][0]
from re import findall from re import findall
file_names = findall(r'.*\n(\S+\.py)\n```.*',files_message) file_names = findall(r'.*\n(\S+\.py)\n```.*',files_message)
@ -61,9 +61,8 @@ class TestIterateCoder(unittest.TestCase):
with GitTemporaryDirectory(): with GitTemporaryDirectory():
# Mock the send method # Mock the send method
with patch.object(Coder, 'send',new_callable=lambda: mock_send): with (patch.object(Coder, 'send',new_callable=lambda: mock_send), patch.object(Coder, 'lint_edited',lambda *_,**__:None), patch.object(GitRepo,'commit',lambda *_,**__:None)):
self.coder.coder = Coder.create(main_model=self.coder.main_model, edit_format=self.coder.main_model.edit_format,from_coder=self.coder,**self.coder.original_kwargs) self.coder.coder = Coder.create(main_model=self.coder.main_model, edit_format=self.coder.main_model.edit_format,from_coder=self.coder,**self.coder.original_kwargs)
# Add initial conversation history # Add initial conversation history
original_context = self.coder.done_messages = [ original_context = self.coder.done_messages = [
{"role": "user", "content": "Initial conversation"}, {"role": "user", "content": "Initial conversation"},