From b8f775743592f414ae4d96355f3cd04b5278c933 Mon Sep 17 00:00:00 2001
From: "Amar Sood (tekacs)" <pkg@tekacs.com>
Date: Mon, 14 Apr 2025 08:34:21 -0400
Subject: [PATCH] Bring accurate enhanced context token counts to /tokens

---
 aider/coders/navigator_coder.py | 166 ++++++++++++++++++++++++++------
 aider/commands.py               |  33 +++++++
 2 files changed, 170 insertions(+), 29 deletions(-)

diff --git a/aider/coders/navigator_coder.py b/aider/coders/navigator_coder.py
index 9940a7147..8fbcb7284 100644
--- a/aider/coders/navigator_coder.py
+++ b/aider/coders/navigator_coder.py
@@ -93,8 +93,100 @@ class NavigatorCoder(Coder):
         # Enable enhanced context blocks by default
         self.use_enhanced_context = True
         
+        # Initialize empty token tracking dictionary and cache structures 
+        # but don't populate yet to avoid startup delay
+        self.context_block_tokens = {}
+        self.context_blocks_cache = {}
+        self.tokens_calculated = False
+        
         super().__init__(*args, **kwargs)
         
+    def _calculate_context_block_tokens(self, force=False):
+        """
+        Calculate token counts for all enhanced context blocks.
+        This is the central method for calculating token counts,
+        ensuring they're consistent across all parts of the code.
+        
+        This method populates the cache for context blocks and calculates tokens.
+        
+        Args:
+            force: If True, recalculate tokens even if already calculated
+        """
+        # Skip if already calculated and not forced
+        if hasattr(self, 'tokens_calculated') and self.tokens_calculated and not force:
+            return
+        
+        # Clear existing token counts
+        self.context_block_tokens = {}
+        
+        # Initialize the cache for context blocks if needed
+        if not hasattr(self, 'context_blocks_cache'):
+            self.context_blocks_cache = {}
+        
+        if not self.use_enhanced_context:
+            return
+            
+        try:
+            # First, clear the cache to force regeneration of all blocks
+            self.context_blocks_cache = {}
+            
+            # Generate all context blocks and calculate token counts
+            block_types = ["environment_info", "directory_structure", "git_status", "symbol_outline"]
+            
+            for block_type in block_types:
+                block_content = self._generate_context_block(block_type)
+                if block_content:
+                    self.context_block_tokens[block_type] = self.main_model.token_count(block_content)
+                    
+            # Mark as calculated
+            self.tokens_calculated = True
+        except Exception as e:
+            # Silently handle errors during calculation
+            # This prevents errors in token counting from breaking the main functionality
+            pass
+            
+    def _generate_context_block(self, block_name):
+        """
+        Generate a specific context block and cache it.
+        This is a helper method for get_cached_context_block.
+        """
+        content = None
+        
+        if block_name == "environment_info":
+            content = self.get_environment_info()
+        elif block_name == "directory_structure":
+            content = self.get_directory_structure()
+        elif block_name == "git_status":
+            content = self.get_git_status()
+        elif block_name == "symbol_outline":
+            content = self.get_context_symbol_outline()
+        elif block_name == "context_summary":
+            content = self.get_context_summary()
+            
+        # Cache the result if it's not None
+        if content is not None:
+            self.context_blocks_cache[block_name] = content
+            
+        return content
+            
+    def get_cached_context_block(self, block_name):
+        """
+        Get a context block from the cache, or generate it if not available.
+        This should be used by format_chat_chunks to avoid regenerating blocks.
+        
+        This will ensure tokens are calculated if they haven't been yet.
+        """
+        # Make sure tokens have been calculated at least once
+        if not hasattr(self, 'tokens_calculated') or not self.tokens_calculated:
+            self._calculate_context_block_tokens()
+            
+        # Return from cache if available
+        if hasattr(self, 'context_blocks_cache') and block_name in self.context_blocks_cache:
+            return self.context_blocks_cache[block_name]
+            
+        # Otherwise generate and cache the block
+        return self._generate_context_block(block_name)
+            
     def set_granular_editing(self, enabled):
         """
         Switch between granular editing tools and legacy search/replace.
@@ -196,13 +288,19 @@ class NavigatorCoder(Coder):
         # If enhanced context blocks are not enabled, just return the base chunks
         if not self.use_enhanced_context:
             return chunks
+        
+        # Make sure token counts are updated - using centralized method
+        # This also populates the context block cache
+        self._calculate_context_block_tokens()
             
-        # Generate all context blocks
-        env_context = self.get_environment_info()
+        # Get blocks from cache to avoid regenerating them
+        env_context = self.get_cached_context_block("environment_info")
+        dir_structure = self.get_cached_context_block("directory_structure")
+        git_status = self.get_cached_context_block("git_status")
+        symbol_outline = self.get_cached_context_block("symbol_outline")
+        
+        # Context summary needs special handling because it depends on other blocks
         context_summary = self.get_context_summary()
-        dir_structure = self.get_directory_structure()
-        git_status = self.get_git_status()
-        symbol_outline = self.get_context_symbol_outline()
         
         # 1. Add relatively static blocks BEFORE done_messages
         # These blocks change less frequently and can be part of the cacheable prefix
@@ -308,13 +406,20 @@ class NavigatorCoder(Coder):
         chunks.cur = list(self.cur_messages)
         chunks.reminder = []
 
-        # TODO review impact of token count on image messages
-        messages_tokens = self.main_model.token_count(chunks.all_messages())
+        # Use accurate token counting method that considers enhanced context blocks
+        base_messages = chunks.all_messages()
+        messages_tokens = self.main_model.token_count(base_messages)
         reminder_tokens = self.main_model.token_count(reminder_message)
         cur_tokens = self.main_model.token_count(chunks.cur)
 
         if None not in (messages_tokens, reminder_tokens, cur_tokens):
-            total_tokens = messages_tokens + reminder_tokens + cur_tokens
+            total_tokens = messages_tokens
+            # Only add tokens for reminder and cur if they're not already included
+            # in the messages_tokens calculation
+            if not chunks.reminder:
+                total_tokens += reminder_tokens
+            if not chunks.cur:
+                total_tokens += cur_tokens
         else:
             # add the reminder anyway
             total_tokens = 0
@@ -351,7 +456,16 @@ class NavigatorCoder(Coder):
         """
         if not self.use_enhanced_context:
             return None
+            
+        # If context_summary is already in the cache, return it
+        if hasattr(self, 'context_blocks_cache') and "context_summary" in self.context_blocks_cache:
+            return self.context_blocks_cache["context_summary"]
+            
         try:
+            # Make sure token counts are updated before generating the summary
+            if not hasattr(self, 'context_block_tokens') or not self.context_block_tokens:
+                self._calculate_context_block_tokens()
+            
             result = "<context name=\"context_summary\">\n"
             result += "## Current Context Overview\n\n"
             max_input_tokens = self.main_model.info.get("max_input_tokens") or 0
@@ -401,27 +515,8 @@ class NavigatorCoder(Coder):
                 else:
                     result += "No read-only files in context\n\n"
 
-            # Additional enhanced context blocks
-            env_info = self.get_environment_info()
-            dir_structure = self.get_directory_structure()
-            git_status = self.get_git_status()
-            symbol_outline = self.get_context_symbol_outline()
-
-            extra_context = ""
-            extra_tokens = 0
-            if env_info:
-                extra_context += env_info + "\n\n"
-                extra_tokens += self.main_model.token_count(env_info)
-            if dir_structure:
-                extra_context += dir_structure + "\n\n"
-                extra_tokens += self.main_model.token_count(dir_structure)
-            if git_status:
-                extra_context += git_status + "\n\n"
-                extra_tokens += self.main_model.token_count(git_status)
-            if symbol_outline:
-                extra_context += symbol_outline + "\n\n"
-                extra_tokens += self.main_model.token_count(symbol_outline)
-
+            # Use the pre-calculated context block tokens
+            extra_tokens = sum(self.context_block_tokens.values())
             total_tokens = total_file_tokens + extra_tokens
 
             result += f"**Total files usage: {total_file_tokens:,} tokens**\n\n"
@@ -435,6 +530,12 @@ class NavigatorCoder(Coder):
                     result += "- `[tool_call(Remove, file_path=\"path/to/large_file.ext\")]`\n"
                     result += "- Keep only essential files in context for best performance"
             result += "\n</context>"
+            
+            # Cache the result
+            if not hasattr(self, 'context_blocks_cache'):
+                self.context_blocks_cache = {}
+            self.context_blocks_cache["context_summary"] = result
+                
             return result
         except Exception as e:
             self.io.tool_error(f"Error generating context summary: {str(e)}")
@@ -1647,8 +1748,15 @@ Just reply with fixed versions of the {blocks} above that failed to match.
         
         if self.use_enhanced_context:
             self.io.tool_output("Enhanced context blocks are now ON - directory structure and git status will be included.")
+            # Mark tokens as needing calculation, but don't calculate yet (lazy calculation)
+            self.tokens_calculated = False
+            self.context_blocks_cache = {}
         else:
             self.io.tool_output("Enhanced context blocks are now OFF - directory structure and git status will not be included.")
+            # Clear token counts and cache when disabled
+            self.context_block_tokens = {}
+            self.context_blocks_cache = {}
+            self.tokens_calculated = False
         
         return True
         
diff --git a/aider/commands.py b/aider/commands.py
index 7bba50a68..1e2ba8879 100644
--- a/aider/commands.py
+++ b/aider/commands.py
@@ -464,6 +464,20 @@ class Commands:
                 tokens = self.coder.main_model.token_count(repo_content)
                 res.append((tokens, "repository map", "use --map-tokens to resize"))
 
+        # Enhanced context blocks (only for navigator mode)
+        if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
+            # Force token calculation if it hasn't been done yet
+            if hasattr(self.coder, '_calculate_context_block_tokens'):
+                if not hasattr(self.coder, 'tokens_calculated') or not self.coder.tokens_calculated:
+                    self.coder._calculate_context_block_tokens()
+                    
+            # Add enhanced context blocks to the display
+            if hasattr(self.coder, 'context_block_tokens') and self.coder.context_block_tokens:
+                for block_name, tokens in self.coder.context_block_tokens.items():
+                    # Format the block name more nicely
+                    display_name = block_name.replace('_', ' ').title()
+                    res.append((tokens, f"{display_name} context block", "/context-blocks to toggle"))
+
         fence = "`" * 3
 
         file_res = []
@@ -872,6 +886,11 @@ class Commands:
                     fname = self.coder.get_rel_fname(abs_file_path)
                     self.io.tool_output(f"Added {fname} to the chat")
                     self.coder.check_added_files()
+                    
+                    # Recalculate context block tokens if using navigator mode
+                    if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
+                        if hasattr(self.coder, '_calculate_context_block_tokens'):
+                            self.coder._calculate_context_block_tokens()
 
     def completions_drop(self):
         files = self.coder.get_inchat_relative_files()
@@ -891,9 +910,16 @@ class Commands:
             else:
                 self.io.tool_output("Dropping all files from the chat session.")
             self._drop_all_files()
+            
+            # Recalculate context block tokens after dropping all files
+            if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
+                if hasattr(self.coder, '_calculate_context_block_tokens'):
+                    self.coder._calculate_context_block_tokens()
             return
 
         filenames = parse_quoted_filenames(args)
+        files_changed = False
+        
         for word in filenames:
             # Expand tilde in the path
             expanded_word = os.path.expanduser(word)
@@ -916,6 +942,7 @@ class Commands:
             for matched_file in read_only_matched:
                 self.coder.abs_read_only_fnames.remove(matched_file)
                 self.io.tool_output(f"Removed read-only file {matched_file} from the chat")
+                files_changed = True
 
             # For editable files, use glob if word contains glob chars, otherwise use substring
             if any(c in expanded_word for c in "*?[]"):
@@ -934,6 +961,12 @@ class Commands:
                 if abs_fname in self.coder.abs_fnames:
                     self.coder.abs_fnames.remove(abs_fname)
                     self.io.tool_output(f"Removed {matched_file} from the chat")
+                    files_changed = True
+                    
+        # Recalculate context block tokens if any files were changed and using navigator mode
+        if files_changed and hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
+            if hasattr(self.coder, '_calculate_context_block_tokens'):
+                self.coder._calculate_context_block_tokens()
 
     def cmd_git(self, args):
         "Run a git command (output excluded from chat)"