Bring accurate enhanced context token counts to /tokens

2025-06-02 18:54:59 +00:00 · 2025-04-14 08:34:21 -04:00 · 2025-04-14 08:34:21 -04:00 · b8f7757435
commit b8f7757435
parent b155143845
2 changed files with 170 additions and 29 deletions
--- a/aider/coders/navigator_coder.py
+++ b/aider/coders/navigator_coder.py
@ -93,8 +93,100 @@ class NavigatorCoder(Coder):
        # Enable enhanced context blocks by default
        self.use_enhanced_context = True
        
+        # Initialize empty token tracking dictionary and cache structures 
+        # but don't populate yet to avoid startup delay
+        self.context_block_tokens = {}
+        self.context_blocks_cache = {}
+        self.tokens_calculated = False
+        
        super().__init__(*args, **kwargs)
        
+    def _calculate_context_block_tokens(self, force=False):
+        """
+        Calculate token counts for all enhanced context blocks.
+        This is the central method for calculating token counts,
+        ensuring they're consistent across all parts of the code.
+        
+        This method populates the cache for context blocks and calculates tokens.
+        
+        Args:
+            force: If True, recalculate tokens even if already calculated
+        """
+        # Skip if already calculated and not forced
+        if hasattr(self, 'tokens_calculated') and self.tokens_calculated and not force:
+            return
+        
+        # Clear existing token counts
+        self.context_block_tokens = {}
+        
+        # Initialize the cache for context blocks if needed
+        if not hasattr(self, 'context_blocks_cache'):
+            self.context_blocks_cache = {}
+        
+        if not self.use_enhanced_context:
+            return
+            
+        try:
+            # First, clear the cache to force regeneration of all blocks
+            self.context_blocks_cache = {}
+            
+            # Generate all context blocks and calculate token counts
+            block_types = ["environment_info", "directory_structure", "git_status", "symbol_outline"]
+            
+            for block_type in block_types:
+                block_content = self._generate_context_block(block_type)
+                if block_content:
+                    self.context_block_tokens[block_type] = self.main_model.token_count(block_content)
+                    
+            # Mark as calculated
+            self.tokens_calculated = True
+        except Exception as e:
+            # Silently handle errors during calculation
+            # This prevents errors in token counting from breaking the main functionality
+            pass
+            
+    def _generate_context_block(self, block_name):
+        """
+        Generate a specific context block and cache it.
+        This is a helper method for get_cached_context_block.
+        """
+        content = None
+        
+        if block_name == "environment_info":
+            content = self.get_environment_info()
+        elif block_name == "directory_structure":
+            content = self.get_directory_structure()
+        elif block_name == "git_status":
+            content = self.get_git_status()
+        elif block_name == "symbol_outline":
+            content = self.get_context_symbol_outline()
+        elif block_name == "context_summary":
+            content = self.get_context_summary()
+            
+        # Cache the result if it's not None
+        if content is not None:
+            self.context_blocks_cache[block_name] = content
+            
+        return content
+            
+    def get_cached_context_block(self, block_name):
+        """
+        Get a context block from the cache, or generate it if not available.
+        This should be used by format_chat_chunks to avoid regenerating blocks.
+        
+        This will ensure tokens are calculated if they haven't been yet.
+        """
+        # Make sure tokens have been calculated at least once
+        if not hasattr(self, 'tokens_calculated') or not self.tokens_calculated:
+            self._calculate_context_block_tokens()
+            
+        # Return from cache if available
+        if hasattr(self, 'context_blocks_cache') and block_name in self.context_blocks_cache:
+            return self.context_blocks_cache[block_name]
+            
+        # Otherwise generate and cache the block
+        return self._generate_context_block(block_name)
+            
    def set_granular_editing(self, enabled):
        """
        Switch between granular editing tools and legacy search/replace.
@ -196,13 +288,19 @@ class NavigatorCoder(Coder):
        # If enhanced context blocks are not enabled, just return the base chunks
        if not self.use_enhanced_context:
            return chunks
+        
+        # Make sure token counts are updated - using centralized method
+        # This also populates the context block cache
+        self._calculate_context_block_tokens()
            
-        # Generate all context blocks
-        env_context = self.get_environment_info()
+        # Get blocks from cache to avoid regenerating them
+        env_context = self.get_cached_context_block("environment_info")
+        dir_structure = self.get_cached_context_block("directory_structure")
+        git_status = self.get_cached_context_block("git_status")
+        symbol_outline = self.get_cached_context_block("symbol_outline")
+        
+        # Context summary needs special handling because it depends on other blocks
        context_summary = self.get_context_summary()
-        dir_structure = self.get_directory_structure()
-        git_status = self.get_git_status()
-        symbol_outline = self.get_context_symbol_outline()
        
        # 1. Add relatively static blocks BEFORE done_messages
        # These blocks change less frequently and can be part of the cacheable prefix
@ -308,13 +406,20 @@ class NavigatorCoder(Coder):
        chunks.cur = list(self.cur_messages)
        chunks.reminder = []

-        # TODO review impact of token count on image messages
-        messages_tokens = self.main_model.token_count(chunks.all_messages())
+        # Use accurate token counting method that considers enhanced context blocks
+        base_messages = chunks.all_messages()
+        messages_tokens = self.main_model.token_count(base_messages)
        reminder_tokens = self.main_model.token_count(reminder_message)
        cur_tokens = self.main_model.token_count(chunks.cur)

        if None not in (messages_tokens, reminder_tokens, cur_tokens):
-            total_tokens = messages_tokens + reminder_tokens + cur_tokens
+            total_tokens = messages_tokens
+            # Only add tokens for reminder and cur if they're not already included
+            # in the messages_tokens calculation
+            if not chunks.reminder:
+                total_tokens += reminder_tokens
+            if not chunks.cur:
+                total_tokens += cur_tokens
        else:
            # add the reminder anyway
            total_tokens = 0
@ -351,7 +456,16 @@ class NavigatorCoder(Coder):
        """
        if not self.use_enhanced_context:
            return None
+            
+        # If context_summary is already in the cache, return it
+        if hasattr(self, 'context_blocks_cache') and "context_summary" in self.context_blocks_cache:
+            return self.context_blocks_cache["context_summary"]
+            
        try:
+            # Make sure token counts are updated before generating the summary
+            if not hasattr(self, 'context_block_tokens') or not self.context_block_tokens:
+                self._calculate_context_block_tokens()
+            
            result = "<context name=\"context_summary\">\n"
            result += "## Current Context Overview\n\n"
            max_input_tokens = self.main_model.info.get("max_input_tokens") or 0
@ -401,27 +515,8 @@ class NavigatorCoder(Coder):
                else:
                    result += "No read-only files in context\n\n"

-            # Additional enhanced context blocks
-            env_info = self.get_environment_info()
-            dir_structure = self.get_directory_structure()
-            git_status = self.get_git_status()
-            symbol_outline = self.get_context_symbol_outline()
-
-            extra_context = ""
-            extra_tokens = 0
-            if env_info:
-                extra_context += env_info + "\n\n"
-                extra_tokens += self.main_model.token_count(env_info)
-            if dir_structure:
-                extra_context += dir_structure + "\n\n"
-                extra_tokens += self.main_model.token_count(dir_structure)
-            if git_status:
-                extra_context += git_status + "\n\n"
-                extra_tokens += self.main_model.token_count(git_status)
-            if symbol_outline:
-                extra_context += symbol_outline + "\n\n"
-                extra_tokens += self.main_model.token_count(symbol_outline)
-
+            # Use the pre-calculated context block tokens
+            extra_tokens = sum(self.context_block_tokens.values())
            total_tokens = total_file_tokens + extra_tokens

            result += f"**Total files usage: {total_file_tokens:,} tokens**\n\n"
@ -435,6 +530,12 @@ class NavigatorCoder(Coder):
                    result += "- `[tool_call(Remove, file_path=\"path/to/large_file.ext\")]`\n"
                    result += "- Keep only essential files in context for best performance"
            result += "\n</context>"
+            
+            # Cache the result
+            if not hasattr(self, 'context_blocks_cache'):
+                self.context_blocks_cache = {}
+            self.context_blocks_cache["context_summary"] = result
+                
            return result
        except Exception as e:
            self.io.tool_error(f"Error generating context summary: {str(e)}")
@ -1647,8 +1748,15 @@ Just reply with fixed versions of the {blocks} above that failed to match.
        
        if self.use_enhanced_context:
            self.io.tool_output("Enhanced context blocks are now ON - directory structure and git status will be included.")
+            # Mark tokens as needing calculation, but don't calculate yet (lazy calculation)
+            self.tokens_calculated = False
+            self.context_blocks_cache = {}
        else:
            self.io.tool_output("Enhanced context blocks are now OFF - directory structure and git status will not be included.")
+            # Clear token counts and cache when disabled
+            self.context_block_tokens = {}
+            self.context_blocks_cache = {}
+            self.tokens_calculated = False
        
        return True
        
--- a/aider/commands.py
+++ b/aider/commands.py
@ -464,6 +464,20 @@ class Commands:
                tokens = self.coder.main_model.token_count(repo_content)
                res.append((tokens, "repository map", "use --map-tokens to resize"))

+        # Enhanced context blocks (only for navigator mode)
+        if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
+            # Force token calculation if it hasn't been done yet
+            if hasattr(self.coder, '_calculate_context_block_tokens'):
+                if not hasattr(self.coder, 'tokens_calculated') or not self.coder.tokens_calculated:
+                    self.coder._calculate_context_block_tokens()
+                    
+            # Add enhanced context blocks to the display
+            if hasattr(self.coder, 'context_block_tokens') and self.coder.context_block_tokens:
+                for block_name, tokens in self.coder.context_block_tokens.items():
+                    # Format the block name more nicely
+                    display_name = block_name.replace('_', ' ').title()
+                    res.append((tokens, f"{display_name} context block", "/context-blocks to toggle"))
+
        fence = "`" * 3

        file_res = []
@ -872,6 +886,11 @@ class Commands:
                    fname = self.coder.get_rel_fname(abs_file_path)
                    self.io.tool_output(f"Added {fname} to the chat")
                    self.coder.check_added_files()
+                    
+                    # Recalculate context block tokens if using navigator mode
+                    if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
+                        if hasattr(self.coder, '_calculate_context_block_tokens'):
+                            self.coder._calculate_context_block_tokens()

    def completions_drop(self):
        files = self.coder.get_inchat_relative_files()
@ -891,9 +910,16 @@ class Commands:
            else:
                self.io.tool_output("Dropping all files from the chat session.")
            self._drop_all_files()
+            
+            # Recalculate context block tokens after dropping all files
+            if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
+                if hasattr(self.coder, '_calculate_context_block_tokens'):
+                    self.coder._calculate_context_block_tokens()
            return

        filenames = parse_quoted_filenames(args)
+        files_changed = False
+        
        for word in filenames:
            # Expand tilde in the path
            expanded_word = os.path.expanduser(word)
@ -916,6 +942,7 @@ class Commands:
            for matched_file in read_only_matched:
                self.coder.abs_read_only_fnames.remove(matched_file)
                self.io.tool_output(f"Removed read-only file {matched_file} from the chat")
+                files_changed = True

            # For editable files, use glob if word contains glob chars, otherwise use substring
            if any(c in expanded_word for c in "*?[]"):
@ -934,6 +961,12 @@ class Commands:
                if abs_fname in self.coder.abs_fnames:
                    self.coder.abs_fnames.remove(abs_fname)
                    self.io.tool_output(f"Removed {matched_file} from the chat")
+                    files_changed = True
+                    
+        # Recalculate context block tokens if any files were changed and using navigator mode
+        if files_changed and hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
+            if hasattr(self.coder, '_calculate_context_block_tokens'):
+                self.coder._calculate_context_block_tokens()

    def cmd_git(self, args):
        "Run a git command (output excluded from chat)"