Bring accurate enhanced context token counts to /tokens

2025-06-06 04:35:00 +00:00 · 2025-04-14 08:34:21 -04:00 · 2025-04-14 08:34:21 -04:00 · b8f7757435
commit b8f7757435
parent b155143845
2 changed files with 170 additions and 29 deletions
--- a/aider/coders/navigator_coder.py
+++ b/aider/coders/navigator_coder.py
@ -93,8 +93,100 @@ class NavigatorCoder(Coder):
        # Enable enhanced context blocks by default
        self.use_enhanced_context = True
        # Initialize empty token tracking dictionary and cache structures 
        # but don't populate yet to avoid startup delay
        self.context_block_tokens = {}
        self.context_blocks_cache = {}
        self.tokens_calculated = False
        super().__init__(*args, **kwargs)
    def _calculate_context_block_tokens(self, force=False):
        """
        Calculate token counts for all enhanced context blocks.
        This is the central method for calculating token counts,
        ensuring they're consistent across all parts of the code.
        This method populates the cache for context blocks and calculates tokens.
        Args:
            force: If True, recalculate tokens even if already calculated
        """
        # Skip if already calculated and not forced
        if hasattr(self, 'tokens_calculated') and self.tokens_calculated and not force:
            return
        # Clear existing token counts
        self.context_block_tokens = {}
        # Initialize the cache for context blocks if needed
        if not hasattr(self, 'context_blocks_cache'):
            self.context_blocks_cache = {}
        if not self.use_enhanced_context:
            return
        try:
            # First, clear the cache to force regeneration of all blocks
            self.context_blocks_cache = {}
            # Generate all context blocks and calculate token counts
            block_types = ["environment_info", "directory_structure", "git_status", "symbol_outline"]
            for block_type in block_types:
                block_content = self._generate_context_block(block_type)
                if block_content:
                    self.context_block_tokens[block_type] = self.main_model.token_count(block_content)
            # Mark as calculated
            self.tokens_calculated = True
        except Exception as e:
            # Silently handle errors during calculation
            # This prevents errors in token counting from breaking the main functionality
            pass
    def _generate_context_block(self, block_name):
        """
        Generate a specific context block and cache it.
        This is a helper method for get_cached_context_block.
        """
        content = None
        if block_name == "environment_info":
            content = self.get_environment_info()
        elif block_name == "directory_structure":
            content = self.get_directory_structure()
        elif block_name == "git_status":
            content = self.get_git_status()
        elif block_name == "symbol_outline":
            content = self.get_context_symbol_outline()
        elif block_name == "context_summary":
            content = self.get_context_summary()
        # Cache the result if it's not None
        if content is not None:
            self.context_blocks_cache[block_name] = content
        return content
    def get_cached_context_block(self, block_name):
        """
        Get a context block from the cache, or generate it if not available.
        This should be used by format_chat_chunks to avoid regenerating blocks.
        This will ensure tokens are calculated if they haven't been yet.
        """
        # Make sure tokens have been calculated at least once
        if not hasattr(self, 'tokens_calculated') or not self.tokens_calculated:
            self._calculate_context_block_tokens()
        # Return from cache if available
        if hasattr(self, 'context_blocks_cache') and block_name in self.context_blocks_cache:
            return self.context_blocks_cache[block_name]
        # Otherwise generate and cache the block
        return self._generate_context_block(block_name)
    def set_granular_editing(self, enabled):
        """
        Switch between granular editing tools and legacy search/replace.
@ -197,12 +289,18 @@ class NavigatorCoder(Coder):
        if not self.use_enhanced_context:
            return chunks
-        # Generate all context blocks
+        # Make sure token counts are updated - using centralized method
-        env_context = self.get_environment_info()
+        # This also populates the context block cache
        self._calculate_context_block_tokens()
        # Get blocks from cache to avoid regenerating them
        env_context = self.get_cached_context_block("environment_info")
        dir_structure = self.get_cached_context_block("directory_structure")
        git_status = self.get_cached_context_block("git_status")
        symbol_outline = self.get_cached_context_block("symbol_outline")
        # Context summary needs special handling because it depends on other blocks
        context_summary = self.get_context_summary()
        dir_structure = self.get_directory_structure()
        git_status = self.get_git_status()
        symbol_outline = self.get_context_symbol_outline()
        # 1. Add relatively static blocks BEFORE done_messages
        # These blocks change less frequently and can be part of the cacheable prefix
@ -308,13 +406,20 @@ class NavigatorCoder(Coder):
        chunks.cur = list(self.cur_messages)
        chunks.reminder = []
-        # TODO review impact of token count on image messages
+        # Use accurate token counting method that considers enhanced context blocks
-        messages_tokens = self.main_model.token_count(chunks.all_messages())
+        base_messages = chunks.all_messages()
        messages_tokens = self.main_model.token_count(base_messages)
        reminder_tokens = self.main_model.token_count(reminder_message)
        cur_tokens = self.main_model.token_count(chunks.cur)
        if None not in (messages_tokens, reminder_tokens, cur_tokens):
-            total_tokens = messages_tokens + reminder_tokens + cur_tokens
+            total_tokens = messages_tokens
            # Only add tokens for reminder and cur if they're not already included
            # in the messages_tokens calculation
            if not chunks.reminder:
                total_tokens += reminder_tokens
            if not chunks.cur:
                total_tokens += cur_tokens
        else:
            # add the reminder anyway
            total_tokens = 0
@ -351,7 +456,16 @@ class NavigatorCoder(Coder):
        """
        if not self.use_enhanced_context:
            return None
        # If context_summary is already in the cache, return it
        if hasattr(self, 'context_blocks_cache') and "context_summary" in self.context_blocks_cache:
            return self.context_blocks_cache["context_summary"]
        try:
            # Make sure token counts are updated before generating the summary
            if not hasattr(self, 'context_block_tokens') or not self.context_block_tokens:
                self._calculate_context_block_tokens()
            result = "<context name=\"context_summary\">\n"
            result += "## Current Context Overview\n\n"
            max_input_tokens = self.main_model.info.get("max_input_tokens") or 0
@ -401,27 +515,8 @@ class NavigatorCoder(Coder):
                else:
                    result += "No read-only files in context\n\n"
-            # Additional enhanced context blocks
+            # Use the pre-calculated context block tokens
-            env_info = self.get_environment_info()
+            extra_tokens = sum(self.context_block_tokens.values())
            dir_structure = self.get_directory_structure()
            git_status = self.get_git_status()
            symbol_outline = self.get_context_symbol_outline()
            extra_context = ""
            extra_tokens = 0
            if env_info:
                extra_context += env_info + "\n\n"
                extra_tokens += self.main_model.token_count(env_info)
            if dir_structure:
                extra_context += dir_structure + "\n\n"
                extra_tokens += self.main_model.token_count(dir_structure)
            if git_status:
                extra_context += git_status + "\n\n"
                extra_tokens += self.main_model.token_count(git_status)
            if symbol_outline:
                extra_context += symbol_outline + "\n\n"
                extra_tokens += self.main_model.token_count(symbol_outline)
            total_tokens = total_file_tokens + extra_tokens
            result += f"**Total files usage: {total_file_tokens:,} tokens**\n\n"
@ -435,6 +530,12 @@ class NavigatorCoder(Coder):
                    result += "- `[tool_call(Remove, file_path=\"path/to/large_file.ext\")]`\n"
                    result += "- Keep only essential files in context for best performance"
            result += "\n</context>"
            # Cache the result
            if not hasattr(self, 'context_blocks_cache'):
                self.context_blocks_cache = {}
            self.context_blocks_cache["context_summary"] = result
            return result
        except Exception as e:
            self.io.tool_error(f"Error generating context summary: {str(e)}")
@ -1647,8 +1748,15 @@ Just reply with fixed versions of the {blocks} above that failed to match.
        if self.use_enhanced_context:
            self.io.tool_output("Enhanced context blocks are now ON - directory structure and git status will be included.")
            # Mark tokens as needing calculation, but don't calculate yet (lazy calculation)
            self.tokens_calculated = False
            self.context_blocks_cache = {}
        else:
            self.io.tool_output("Enhanced context blocks are now OFF - directory structure and git status will not be included.")
            # Clear token counts and cache when disabled
            self.context_block_tokens = {}
            self.context_blocks_cache = {}
            self.tokens_calculated = False
        return True
--- a/aider/commands.py
+++ b/aider/commands.py
@ -464,6 +464,20 @@ class Commands:
                tokens = self.coder.main_model.token_count(repo_content)
                res.append((tokens, "repository map", "use --map-tokens to resize"))
        # Enhanced context blocks (only for navigator mode)
        if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
            # Force token calculation if it hasn't been done yet
            if hasattr(self.coder, '_calculate_context_block_tokens'):
                if not hasattr(self.coder, 'tokens_calculated') or not self.coder.tokens_calculated:
                    self.coder._calculate_context_block_tokens()
            # Add enhanced context blocks to the display
            if hasattr(self.coder, 'context_block_tokens') and self.coder.context_block_tokens:
                for block_name, tokens in self.coder.context_block_tokens.items():
                    # Format the block name more nicely
                    display_name = block_name.replace('_', ' ').title()
                    res.append((tokens, f"{display_name} context block", "/context-blocks to toggle"))
        fence = "`" * 3
        file_res = []
@ -873,6 +887,11 @@ class Commands:
                    self.io.tool_output(f"Added {fname} to the chat")
                    self.coder.check_added_files()
                    # Recalculate context block tokens if using navigator mode
                    if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
                        if hasattr(self.coder, '_calculate_context_block_tokens'):
                            self.coder._calculate_context_block_tokens()
    def completions_drop(self):
        files = self.coder.get_inchat_relative_files()
        read_only_files = [self.coder.get_rel_fname(fn) for fn in self.coder.abs_read_only_fnames]
@ -891,9 +910,16 @@ class Commands:
            else:
                self.io.tool_output("Dropping all files from the chat session.")
            self._drop_all_files()
            # Recalculate context block tokens after dropping all files
            if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
                if hasattr(self.coder, '_calculate_context_block_tokens'):
                    self.coder._calculate_context_block_tokens()
            return
        filenames = parse_quoted_filenames(args)
        files_changed = False
        for word in filenames:
            # Expand tilde in the path
            expanded_word = os.path.expanduser(word)
@ -916,6 +942,7 @@ class Commands:
            for matched_file in read_only_matched:
                self.coder.abs_read_only_fnames.remove(matched_file)
                self.io.tool_output(f"Removed read-only file {matched_file} from the chat")
                files_changed = True
            # For editable files, use glob if word contains glob chars, otherwise use substring
            if any(c in expanded_word for c in "*?[]"):
@ -934,6 +961,12 @@ class Commands:
                if abs_fname in self.coder.abs_fnames:
                    self.coder.abs_fnames.remove(abs_fname)
                    self.io.tool_output(f"Removed {matched_file} from the chat")
                    files_changed = True
        # Recalculate context block tokens if any files were changed and using navigator mode
        if files_changed and hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
            if hasattr(self.coder, '_calculate_context_block_tokens'):
                self.coder._calculate_context_block_tokens()
    def cmd_git(self, args):
        "Run a git command (output excluded from chat)"