From 62ed9d46219fa4c53538932f17ce80f75e9dff29 Mon Sep 17 00:00:00 2001 From: "Amar Sood (tekacs)" Date: Thu, 10 Apr 2025 06:26:44 -0400 Subject: [PATCH] Optimizations for when large numbers of files are added --- aider/commands.py | 70 +++++++++++++++++++++++++++++++++-------------- aider/io.py | 29 +++++++++++++++++++- aider/utils.py | 13 ++++++++- 3 files changed, 89 insertions(+), 23 deletions(-) diff --git a/aider/commands.py b/aider/commands.py index 81fc80093..e295c018e 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -436,6 +436,11 @@ class Commands: self.coder.choose_fence() + # Show progress indicator + total_files = len(self.coder.abs_fnames) + len(self.coder.abs_read_only_fnames) + if total_files > 20: + self.io.tool_output(f"Calculating tokens for {total_files} files...") + # system messages main_sys = self.coder.fmt_system_prompt(self.coder.gpt_prompts.main_system) main_sys += "\n" + self.coder.fmt_system_prompt(self.coder.gpt_prompts.system_reminder) @@ -467,27 +472,50 @@ class Commands: fence = "`" * 3 file_res = [] - # files - for fname in self.coder.abs_fnames: - relative_fname = self.coder.get_rel_fname(fname) - content = self.io.read_text(fname) - if is_image_file(relative_fname): - tokens = self.coder.main_model.token_count_for_image(fname) - else: - # approximate - content = f"{relative_fname}\n{fence}\n" + content + "{fence}\n" - tokens = self.coder.main_model.token_count(content) - file_res.append((tokens, f"{relative_fname}", "/drop to remove")) + # Process files with progress indication + total_editable_files = len(self.coder.abs_fnames) + total_readonly_files = len(self.coder.abs_read_only_fnames) + + # Display progress for editable files + if total_editable_files > 0: + if total_editable_files > 20: + self.io.tool_output(f"Calculating tokens for {total_editable_files} editable files...") + + # Calculate tokens for editable files + for i, fname in enumerate(self.coder.abs_fnames): + if i > 0 and i % 20 == 0 and total_editable_files > 20: + self.io.tool_output(f"Processed {i}/{total_editable_files} editable files...") + + relative_fname = self.coder.get_rel_fname(fname) + content = self.io.read_text(fname) + if is_image_file(relative_fname): + tokens = self.coder.main_model.token_count_for_image(fname) + else: + # approximate + content = f"{relative_fname}\n{fence}\n" + content + "{fence}\n" + tokens = self.coder.main_model.token_count(content) + file_res.append((tokens, f"{relative_fname}", "/drop to remove")) + + # Display progress for read-only files + if total_readonly_files > 0: + if total_readonly_files > 20: + self.io.tool_output(f"Calculating tokens for {total_readonly_files} read-only files...") + + # Calculate tokens for read-only files + for i, fname in enumerate(self.coder.abs_read_only_fnames): + if i > 0 and i % 20 == 0 and total_readonly_files > 20: + self.io.tool_output(f"Processed {i}/{total_readonly_files} read-only files...") + + relative_fname = self.coder.get_rel_fname(fname) + content = self.io.read_text(fname) + if content is not None and not is_image_file(relative_fname): + # approximate + content = f"{relative_fname}\n{fence}\n" + content + "{fence}\n" + tokens = self.coder.main_model.token_count(content) + file_res.append((tokens, f"{relative_fname} (read-only)", "/drop to remove")) - # read-only files - for fname in self.coder.abs_read_only_fnames: - relative_fname = self.coder.get_rel_fname(fname) - content = self.io.read_text(fname) - if content is not None and not is_image_file(relative_fname): - # approximate - content = f"{relative_fname}\n{fence}\n" + content + "{fence}\n" - tokens = self.coder.main_model.token_count(content) - file_res.append((tokens, f"{relative_fname} (read-only)", "/drop to remove")) + if total_files > 20: + self.io.tool_output("Token calculation complete. Generating report...") file_res.sort() res.extend(file_res) @@ -503,7 +531,7 @@ class Commands: def fmt(v): return format(int(v), ",").rjust(width) - col_width = max(len(row[1]) for row in res) + col_width = max(len(row[1]) for row in res) if res else 0 cost_pad = " " * cost_width total = 0 diff --git a/aider/io.py b/aider/io.py index 90f581aab..2ef4cf7c0 100644 --- a/aider/io.py +++ b/aider/io.py @@ -128,8 +128,20 @@ class AutoCompleter(Completer): if self.tokenized: return self.tokenized = True + + # Performance optimization for large file sets + if len(self.all_fnames) > 100: + # Skip tokenization for very large numbers of files to avoid input lag + self.tokenized = True + return + + # Limit number of files to process to avoid excessive tokenization time + process_fnames = self.all_fnames + if len(process_fnames) > 50: + # Only process a subset of files to maintain responsiveness + process_fnames = process_fnames[:50] - for fname in self.all_fnames: + for fname in process_fnames: try: with open(fname, "r", encoding=self.encoding) as f: content = f.read() @@ -1120,6 +1132,21 @@ class InputOutput: self.chat_history_file = None # Disable further attempts to write def format_files_for_input(self, rel_fnames, rel_read_only_fnames): + # Optimization for large number of files + total_files = len(rel_fnames) + len(rel_read_only_fnames or []) + + # For very large numbers of files, use a summary display + if total_files > 50: + read_only_count = len(rel_read_only_fnames or []) + editable_count = len([f for f in rel_fnames if f not in (rel_read_only_fnames or [])]) + + summary = f"{editable_count} editable file(s)" + if read_only_count > 0: + summary += f", {read_only_count} read-only file(s)" + summary += " (use /ls to list all files)\n" + return summary + + # Original implementation for reasonable number of files if not self.pretty: read_only_files = [] for full_path in sorted(rel_read_only_fnames or []): diff --git a/aider/utils.py b/aider/utils.py index c6773f140..d58e817df 100644 --- a/aider/utils.py +++ b/aider/utils.py @@ -126,7 +126,18 @@ def format_messages(messages, title=None): else: output.append(f"{role} {item}") elif isinstance(content, str): # Handle string content - output.append(format_content(role, content)) + # For large content, especially with many files, use a truncated display approach + if len(content) > 5000: + # Count the number of code blocks (approximation) + fence_count = content.count("```") // 2 + if fence_count > 5: + # Show truncated content with file count for large files to improve performance + first_line = content.split("\n", 1)[0] + output.append(f"{role} {first_line} [content with ~{fence_count} files truncated]") + else: + output.append(format_content(role, content)) + else: + output.append(format_content(role, content)) function_call = msg.get("function_call") if function_call: output.append(f"{role} Function Call: {function_call}")