From b8f775743592f414ae4d96355f3cd04b5278c933 Mon Sep 17 00:00:00 2001 From: "Amar Sood (tekacs)" Date: Mon, 14 Apr 2025 08:34:21 -0400 Subject: [PATCH] Bring accurate enhanced context token counts to /tokens --- aider/coders/navigator_coder.py | 166 ++++++++++++++++++++++++++------ aider/commands.py | 33 +++++++ 2 files changed, 170 insertions(+), 29 deletions(-) diff --git a/aider/coders/navigator_coder.py b/aider/coders/navigator_coder.py index 9940a7147..8fbcb7284 100644 --- a/aider/coders/navigator_coder.py +++ b/aider/coders/navigator_coder.py @@ -93,8 +93,100 @@ class NavigatorCoder(Coder): # Enable enhanced context blocks by default self.use_enhanced_context = True + # Initialize empty token tracking dictionary and cache structures + # but don't populate yet to avoid startup delay + self.context_block_tokens = {} + self.context_blocks_cache = {} + self.tokens_calculated = False + super().__init__(*args, **kwargs) + def _calculate_context_block_tokens(self, force=False): + """ + Calculate token counts for all enhanced context blocks. + This is the central method for calculating token counts, + ensuring they're consistent across all parts of the code. + + This method populates the cache for context blocks and calculates tokens. + + Args: + force: If True, recalculate tokens even if already calculated + """ + # Skip if already calculated and not forced + if hasattr(self, 'tokens_calculated') and self.tokens_calculated and not force: + return + + # Clear existing token counts + self.context_block_tokens = {} + + # Initialize the cache for context blocks if needed + if not hasattr(self, 'context_blocks_cache'): + self.context_blocks_cache = {} + + if not self.use_enhanced_context: + return + + try: + # First, clear the cache to force regeneration of all blocks + self.context_blocks_cache = {} + + # Generate all context blocks and calculate token counts + block_types = ["environment_info", "directory_structure", "git_status", "symbol_outline"] + + for block_type in block_types: + block_content = self._generate_context_block(block_type) + if block_content: + self.context_block_tokens[block_type] = self.main_model.token_count(block_content) + + # Mark as calculated + self.tokens_calculated = True + except Exception as e: + # Silently handle errors during calculation + # This prevents errors in token counting from breaking the main functionality + pass + + def _generate_context_block(self, block_name): + """ + Generate a specific context block and cache it. + This is a helper method for get_cached_context_block. + """ + content = None + + if block_name == "environment_info": + content = self.get_environment_info() + elif block_name == "directory_structure": + content = self.get_directory_structure() + elif block_name == "git_status": + content = self.get_git_status() + elif block_name == "symbol_outline": + content = self.get_context_symbol_outline() + elif block_name == "context_summary": + content = self.get_context_summary() + + # Cache the result if it's not None + if content is not None: + self.context_blocks_cache[block_name] = content + + return content + + def get_cached_context_block(self, block_name): + """ + Get a context block from the cache, or generate it if not available. + This should be used by format_chat_chunks to avoid regenerating blocks. + + This will ensure tokens are calculated if they haven't been yet. + """ + # Make sure tokens have been calculated at least once + if not hasattr(self, 'tokens_calculated') or not self.tokens_calculated: + self._calculate_context_block_tokens() + + # Return from cache if available + if hasattr(self, 'context_blocks_cache') and block_name in self.context_blocks_cache: + return self.context_blocks_cache[block_name] + + # Otherwise generate and cache the block + return self._generate_context_block(block_name) + def set_granular_editing(self, enabled): """ Switch between granular editing tools and legacy search/replace. @@ -196,13 +288,19 @@ class NavigatorCoder(Coder): # If enhanced context blocks are not enabled, just return the base chunks if not self.use_enhanced_context: return chunks + + # Make sure token counts are updated - using centralized method + # This also populates the context block cache + self._calculate_context_block_tokens() - # Generate all context blocks - env_context = self.get_environment_info() + # Get blocks from cache to avoid regenerating them + env_context = self.get_cached_context_block("environment_info") + dir_structure = self.get_cached_context_block("directory_structure") + git_status = self.get_cached_context_block("git_status") + symbol_outline = self.get_cached_context_block("symbol_outline") + + # Context summary needs special handling because it depends on other blocks context_summary = self.get_context_summary() - dir_structure = self.get_directory_structure() - git_status = self.get_git_status() - symbol_outline = self.get_context_symbol_outline() # 1. Add relatively static blocks BEFORE done_messages # These blocks change less frequently and can be part of the cacheable prefix @@ -308,13 +406,20 @@ class NavigatorCoder(Coder): chunks.cur = list(self.cur_messages) chunks.reminder = [] - # TODO review impact of token count on image messages - messages_tokens = self.main_model.token_count(chunks.all_messages()) + # Use accurate token counting method that considers enhanced context blocks + base_messages = chunks.all_messages() + messages_tokens = self.main_model.token_count(base_messages) reminder_tokens = self.main_model.token_count(reminder_message) cur_tokens = self.main_model.token_count(chunks.cur) if None not in (messages_tokens, reminder_tokens, cur_tokens): - total_tokens = messages_tokens + reminder_tokens + cur_tokens + total_tokens = messages_tokens + # Only add tokens for reminder and cur if they're not already included + # in the messages_tokens calculation + if not chunks.reminder: + total_tokens += reminder_tokens + if not chunks.cur: + total_tokens += cur_tokens else: # add the reminder anyway total_tokens = 0 @@ -351,7 +456,16 @@ class NavigatorCoder(Coder): """ if not self.use_enhanced_context: return None + + # If context_summary is already in the cache, return it + if hasattr(self, 'context_blocks_cache') and "context_summary" in self.context_blocks_cache: + return self.context_blocks_cache["context_summary"] + try: + # Make sure token counts are updated before generating the summary + if not hasattr(self, 'context_block_tokens') or not self.context_block_tokens: + self._calculate_context_block_tokens() + result = "\n" result += "## Current Context Overview\n\n" max_input_tokens = self.main_model.info.get("max_input_tokens") or 0 @@ -401,27 +515,8 @@ class NavigatorCoder(Coder): else: result += "No read-only files in context\n\n" - # Additional enhanced context blocks - env_info = self.get_environment_info() - dir_structure = self.get_directory_structure() - git_status = self.get_git_status() - symbol_outline = self.get_context_symbol_outline() - - extra_context = "" - extra_tokens = 0 - if env_info: - extra_context += env_info + "\n\n" - extra_tokens += self.main_model.token_count(env_info) - if dir_structure: - extra_context += dir_structure + "\n\n" - extra_tokens += self.main_model.token_count(dir_structure) - if git_status: - extra_context += git_status + "\n\n" - extra_tokens += self.main_model.token_count(git_status) - if symbol_outline: - extra_context += symbol_outline + "\n\n" - extra_tokens += self.main_model.token_count(symbol_outline) - + # Use the pre-calculated context block tokens + extra_tokens = sum(self.context_block_tokens.values()) total_tokens = total_file_tokens + extra_tokens result += f"**Total files usage: {total_file_tokens:,} tokens**\n\n" @@ -435,6 +530,12 @@ class NavigatorCoder(Coder): result += "- `[tool_call(Remove, file_path=\"path/to/large_file.ext\")]`\n" result += "- Keep only essential files in context for best performance" result += "\n" + + # Cache the result + if not hasattr(self, 'context_blocks_cache'): + self.context_blocks_cache = {} + self.context_blocks_cache["context_summary"] = result + return result except Exception as e: self.io.tool_error(f"Error generating context summary: {str(e)}") @@ -1647,8 +1748,15 @@ Just reply with fixed versions of the {blocks} above that failed to match. if self.use_enhanced_context: self.io.tool_output("Enhanced context blocks are now ON - directory structure and git status will be included.") + # Mark tokens as needing calculation, but don't calculate yet (lazy calculation) + self.tokens_calculated = False + self.context_blocks_cache = {} else: self.io.tool_output("Enhanced context blocks are now OFF - directory structure and git status will not be included.") + # Clear token counts and cache when disabled + self.context_block_tokens = {} + self.context_blocks_cache = {} + self.tokens_calculated = False return True diff --git a/aider/commands.py b/aider/commands.py index 7bba50a68..1e2ba8879 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -464,6 +464,20 @@ class Commands: tokens = self.coder.main_model.token_count(repo_content) res.append((tokens, "repository map", "use --map-tokens to resize")) + # Enhanced context blocks (only for navigator mode) + if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context: + # Force token calculation if it hasn't been done yet + if hasattr(self.coder, '_calculate_context_block_tokens'): + if not hasattr(self.coder, 'tokens_calculated') or not self.coder.tokens_calculated: + self.coder._calculate_context_block_tokens() + + # Add enhanced context blocks to the display + if hasattr(self.coder, 'context_block_tokens') and self.coder.context_block_tokens: + for block_name, tokens in self.coder.context_block_tokens.items(): + # Format the block name more nicely + display_name = block_name.replace('_', ' ').title() + res.append((tokens, f"{display_name} context block", "/context-blocks to toggle")) + fence = "`" * 3 file_res = [] @@ -872,6 +886,11 @@ class Commands: fname = self.coder.get_rel_fname(abs_file_path) self.io.tool_output(f"Added {fname} to the chat") self.coder.check_added_files() + + # Recalculate context block tokens if using navigator mode + if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context: + if hasattr(self.coder, '_calculate_context_block_tokens'): + self.coder._calculate_context_block_tokens() def completions_drop(self): files = self.coder.get_inchat_relative_files() @@ -891,9 +910,16 @@ class Commands: else: self.io.tool_output("Dropping all files from the chat session.") self._drop_all_files() + + # Recalculate context block tokens after dropping all files + if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context: + if hasattr(self.coder, '_calculate_context_block_tokens'): + self.coder._calculate_context_block_tokens() return filenames = parse_quoted_filenames(args) + files_changed = False + for word in filenames: # Expand tilde in the path expanded_word = os.path.expanduser(word) @@ -916,6 +942,7 @@ class Commands: for matched_file in read_only_matched: self.coder.abs_read_only_fnames.remove(matched_file) self.io.tool_output(f"Removed read-only file {matched_file} from the chat") + files_changed = True # For editable files, use glob if word contains glob chars, otherwise use substring if any(c in expanded_word for c in "*?[]"): @@ -934,6 +961,12 @@ class Commands: if abs_fname in self.coder.abs_fnames: self.coder.abs_fnames.remove(abs_fname) self.io.tool_output(f"Removed {matched_file} from the chat") + files_changed = True + + # Recalculate context block tokens if any files were changed and using navigator mode + if files_changed and hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context: + if hasattr(self.coder, '_calculate_context_block_tokens'): + self.coder._calculate_context_block_tokens() def cmd_git(self, args): "Run a git command (output excluded from chat)"