From 793035b2e09a2292639cf47f8d388b913b3ee408 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Mon, 5 Aug 2024 16:05:19 -0300 Subject: [PATCH] improve perf of repomap and other file intensive actions --- aider/coders/base_coder.py | 4 +++- aider/repo.py | 36 ++++++++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index b89012006..95d88e425 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -1330,7 +1330,9 @@ class Coder: else: files = self.get_inchat_relative_files() - files = [fname for fname in files if self.is_file_safe(fname)] + # This is quite slow in large repos + # files = [fname for fname in files if self.is_file_safe(fname)] + return sorted(set(files)) def get_all_abs_files(self): diff --git a/aider/repo.py b/aider/repo.py index 40e30ab17..804de8bae 100644 --- a/aider/repo.py +++ b/aider/repo.py @@ -229,6 +229,8 @@ class GitRepo: return diffs + tree_files = {} + def get_tracked_files(self): if not self.repo: return [] @@ -240,25 +242,39 @@ class GitRepo: files = [] if commit: - for blob in commit.tree.traverse(): - if blob.type == "blob": # blob is a file - files.append(blob.path) + if commit in self.tree_files: + files = self.tree_files[commit] + else: + for blob in commit.tree.traverse(): + if blob.type == "blob": # blob is a file + files.append(blob.path) + files = set(self.normalize_path(path) for path in files) + self.tree_files[commit] = set(files) # Add staged files index = self.repo.index staged_files = [path for path, _ in index.entries.keys()] + files.update(self.normalize_path(path) for path in staged_files) - files.extend(staged_files) - - # convert to appropriate os.sep, since git always normalizes to / - res = set(self.normalize_path(path) for path in files) - - res = [fname for fname in res if not self.ignored_file(fname)] + res = [fname for fname in files if not self.ignored_file(fname)] return res + normalized_path = {} + def normalize_path(self, path): - return str(Path(PurePosixPath((Path(self.root) / path).relative_to(self.root)))) + orig_path = path + res = self.normalized_path.get(orig_path) + if res: + return res + + path = Path(self.root) / path + path = PurePosixPath(path) + path = path.relative_to(self.root) + + path = str(path) + self.normalized_path[orig_path] = path + return path def refresh_aider_ignore(self): if not self.aider_ignore_file: