From 62ed9d46219fa4c53538932f17ce80f75e9dff29 Mon Sep 17 00:00:00 2001
From: "Amar Sood (tekacs)" <pkg@tekacs.com>
Date: Thu, 10 Apr 2025 06:26:44 -0400
Subject: [PATCH] Optimizations for when large numbers of files are added

---
 aider/commands.py | 70 +++++++++++++++++++++++++++++++++--------------
 aider/io.py       | 29 +++++++++++++++++++-
 aider/utils.py    | 13 ++++++++-
 3 files changed, 89 insertions(+), 23 deletions(-)

diff --git a/aider/commands.py b/aider/commands.py
index 81fc80093..e295c018e 100644
--- a/aider/commands.py
+++ b/aider/commands.py
@@ -436,6 +436,11 @@ class Commands:
 
         self.coder.choose_fence()
 
+        # Show progress indicator
+        total_files = len(self.coder.abs_fnames) + len(self.coder.abs_read_only_fnames)
+        if total_files > 20:
+            self.io.tool_output(f"Calculating tokens for {total_files} files...")
+
         # system messages
         main_sys = self.coder.fmt_system_prompt(self.coder.gpt_prompts.main_system)
         main_sys += "\n" + self.coder.fmt_system_prompt(self.coder.gpt_prompts.system_reminder)
@@ -467,27 +472,50 @@ class Commands:
         fence = "`" * 3
 
         file_res = []
-        # files
-        for fname in self.coder.abs_fnames:
-            relative_fname = self.coder.get_rel_fname(fname)
-            content = self.io.read_text(fname)
-            if is_image_file(relative_fname):
-                tokens = self.coder.main_model.token_count_for_image(fname)
-            else:
-                # approximate
-                content = f"{relative_fname}\n{fence}\n" + content + "{fence}\n"
-                tokens = self.coder.main_model.token_count(content)
-            file_res.append((tokens, f"{relative_fname}", "/drop to remove"))
+        # Process files with progress indication
+        total_editable_files = len(self.coder.abs_fnames)
+        total_readonly_files = len(self.coder.abs_read_only_fnames)
+        
+        # Display progress for editable files
+        if total_editable_files > 0:
+            if total_editable_files > 20:
+                self.io.tool_output(f"Calculating tokens for {total_editable_files} editable files...")
+            
+            # Calculate tokens for editable files
+            for i, fname in enumerate(self.coder.abs_fnames):
+                if i > 0 and i % 20 == 0 and total_editable_files > 20:
+                    self.io.tool_output(f"Processed {i}/{total_editable_files} editable files...")
+                
+                relative_fname = self.coder.get_rel_fname(fname)
+                content = self.io.read_text(fname)
+                if is_image_file(relative_fname):
+                    tokens = self.coder.main_model.token_count_for_image(fname)
+                else:
+                    # approximate
+                    content = f"{relative_fname}\n{fence}\n" + content + "{fence}\n"
+                    tokens = self.coder.main_model.token_count(content)
+                file_res.append((tokens, f"{relative_fname}", "/drop to remove"))
+        
+        # Display progress for read-only files
+        if total_readonly_files > 0:
+            if total_readonly_files > 20:
+                self.io.tool_output(f"Calculating tokens for {total_readonly_files} read-only files...")
+            
+            # Calculate tokens for read-only files
+            for i, fname in enumerate(self.coder.abs_read_only_fnames):
+                if i > 0 and i % 20 == 0 and total_readonly_files > 20:
+                    self.io.tool_output(f"Processed {i}/{total_readonly_files} read-only files...")
+                
+                relative_fname = self.coder.get_rel_fname(fname)
+                content = self.io.read_text(fname)
+                if content is not None and not is_image_file(relative_fname):
+                    # approximate
+                    content = f"{relative_fname}\n{fence}\n" + content + "{fence}\n"
+                    tokens = self.coder.main_model.token_count(content)
+                    file_res.append((tokens, f"{relative_fname} (read-only)", "/drop to remove"))
 
-        # read-only files
-        for fname in self.coder.abs_read_only_fnames:
-            relative_fname = self.coder.get_rel_fname(fname)
-            content = self.io.read_text(fname)
-            if content is not None and not is_image_file(relative_fname):
-                # approximate
-                content = f"{relative_fname}\n{fence}\n" + content + "{fence}\n"
-                tokens = self.coder.main_model.token_count(content)
-                file_res.append((tokens, f"{relative_fname} (read-only)", "/drop to remove"))
+        if total_files > 20:
+            self.io.tool_output("Token calculation complete. Generating report...")
 
         file_res.sort()
         res.extend(file_res)
@@ -503,7 +531,7 @@ class Commands:
         def fmt(v):
             return format(int(v), ",").rjust(width)
 
-        col_width = max(len(row[1]) for row in res)
+        col_width = max(len(row[1]) for row in res) if res else 0
 
         cost_pad = " " * cost_width
         total = 0
diff --git a/aider/io.py b/aider/io.py
index 90f581aab..2ef4cf7c0 100644
--- a/aider/io.py
+++ b/aider/io.py
@@ -128,8 +128,20 @@ class AutoCompleter(Completer):
         if self.tokenized:
             return
         self.tokenized = True
+        
+        # Performance optimization for large file sets
+        if len(self.all_fnames) > 100:
+            # Skip tokenization for very large numbers of files to avoid input lag
+            self.tokenized = True
+            return
+            
+        # Limit number of files to process to avoid excessive tokenization time
+        process_fnames = self.all_fnames
+        if len(process_fnames) > 50:
+            # Only process a subset of files to maintain responsiveness
+            process_fnames = process_fnames[:50]
 
-        for fname in self.all_fnames:
+        for fname in process_fnames:
             try:
                 with open(fname, "r", encoding=self.encoding) as f:
                     content = f.read()
@@ -1120,6 +1132,21 @@ class InputOutput:
                 self.chat_history_file = None  # Disable further attempts to write
 
     def format_files_for_input(self, rel_fnames, rel_read_only_fnames):
+        # Optimization for large number of files
+        total_files = len(rel_fnames) + len(rel_read_only_fnames or [])
+        
+        # For very large numbers of files, use a summary display
+        if total_files > 50:
+            read_only_count = len(rel_read_only_fnames or [])
+            editable_count = len([f for f in rel_fnames if f not in (rel_read_only_fnames or [])])
+            
+            summary = f"{editable_count} editable file(s)"
+            if read_only_count > 0:
+                summary += f", {read_only_count} read-only file(s)"
+            summary += " (use /ls to list all files)\n"
+            return summary
+            
+        # Original implementation for reasonable number of files
         if not self.pretty:
             read_only_files = []
             for full_path in sorted(rel_read_only_fnames or []):
diff --git a/aider/utils.py b/aider/utils.py
index c6773f140..d58e817df 100644
--- a/aider/utils.py
+++ b/aider/utils.py
@@ -126,7 +126,18 @@ def format_messages(messages, title=None):
                 else:
                     output.append(f"{role} {item}")
         elif isinstance(content, str):  # Handle string content
-            output.append(format_content(role, content))
+            # For large content, especially with many files, use a truncated display approach
+            if len(content) > 5000:
+                # Count the number of code blocks (approximation)
+                fence_count = content.count("```") // 2
+                if fence_count > 5:
+                    # Show truncated content with file count for large files to improve performance
+                    first_line = content.split("\n", 1)[0]
+                    output.append(f"{role} {first_line} [content with ~{fence_count} files truncated]")
+                else:
+                    output.append(format_content(role, content))
+            else:
+                output.append(format_content(role, content))
         function_call = msg.get("function_call")
         if function_call:
             output.append(f"{role} Function Call: {function_call}")