Bring accurate enhanced context token counts to /tokens

This commit is contained in:
Amar Sood (tekacs) 2025-04-14 08:34:21 -04:00
parent b155143845
commit b8f7757435
2 changed files with 170 additions and 29 deletions

View file

@ -93,8 +93,100 @@ class NavigatorCoder(Coder):
# Enable enhanced context blocks by default # Enable enhanced context blocks by default
self.use_enhanced_context = True self.use_enhanced_context = True
# Initialize empty token tracking dictionary and cache structures
# but don't populate yet to avoid startup delay
self.context_block_tokens = {}
self.context_blocks_cache = {}
self.tokens_calculated = False
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
def _calculate_context_block_tokens(self, force=False):
"""
Calculate token counts for all enhanced context blocks.
This is the central method for calculating token counts,
ensuring they're consistent across all parts of the code.
This method populates the cache for context blocks and calculates tokens.
Args:
force: If True, recalculate tokens even if already calculated
"""
# Skip if already calculated and not forced
if hasattr(self, 'tokens_calculated') and self.tokens_calculated and not force:
return
# Clear existing token counts
self.context_block_tokens = {}
# Initialize the cache for context blocks if needed
if not hasattr(self, 'context_blocks_cache'):
self.context_blocks_cache = {}
if not self.use_enhanced_context:
return
try:
# First, clear the cache to force regeneration of all blocks
self.context_blocks_cache = {}
# Generate all context blocks and calculate token counts
block_types = ["environment_info", "directory_structure", "git_status", "symbol_outline"]
for block_type in block_types:
block_content = self._generate_context_block(block_type)
if block_content:
self.context_block_tokens[block_type] = self.main_model.token_count(block_content)
# Mark as calculated
self.tokens_calculated = True
except Exception as e:
# Silently handle errors during calculation
# This prevents errors in token counting from breaking the main functionality
pass
def _generate_context_block(self, block_name):
"""
Generate a specific context block and cache it.
This is a helper method for get_cached_context_block.
"""
content = None
if block_name == "environment_info":
content = self.get_environment_info()
elif block_name == "directory_structure":
content = self.get_directory_structure()
elif block_name == "git_status":
content = self.get_git_status()
elif block_name == "symbol_outline":
content = self.get_context_symbol_outline()
elif block_name == "context_summary":
content = self.get_context_summary()
# Cache the result if it's not None
if content is not None:
self.context_blocks_cache[block_name] = content
return content
def get_cached_context_block(self, block_name):
"""
Get a context block from the cache, or generate it if not available.
This should be used by format_chat_chunks to avoid regenerating blocks.
This will ensure tokens are calculated if they haven't been yet.
"""
# Make sure tokens have been calculated at least once
if not hasattr(self, 'tokens_calculated') or not self.tokens_calculated:
self._calculate_context_block_tokens()
# Return from cache if available
if hasattr(self, 'context_blocks_cache') and block_name in self.context_blocks_cache:
return self.context_blocks_cache[block_name]
# Otherwise generate and cache the block
return self._generate_context_block(block_name)
def set_granular_editing(self, enabled): def set_granular_editing(self, enabled):
""" """
Switch between granular editing tools and legacy search/replace. Switch between granular editing tools and legacy search/replace.
@ -197,12 +289,18 @@ class NavigatorCoder(Coder):
if not self.use_enhanced_context: if not self.use_enhanced_context:
return chunks return chunks
# Generate all context blocks # Make sure token counts are updated - using centralized method
env_context = self.get_environment_info() # This also populates the context block cache
self._calculate_context_block_tokens()
# Get blocks from cache to avoid regenerating them
env_context = self.get_cached_context_block("environment_info")
dir_structure = self.get_cached_context_block("directory_structure")
git_status = self.get_cached_context_block("git_status")
symbol_outline = self.get_cached_context_block("symbol_outline")
# Context summary needs special handling because it depends on other blocks
context_summary = self.get_context_summary() context_summary = self.get_context_summary()
dir_structure = self.get_directory_structure()
git_status = self.get_git_status()
symbol_outline = self.get_context_symbol_outline()
# 1. Add relatively static blocks BEFORE done_messages # 1. Add relatively static blocks BEFORE done_messages
# These blocks change less frequently and can be part of the cacheable prefix # These blocks change less frequently and can be part of the cacheable prefix
@ -308,13 +406,20 @@ class NavigatorCoder(Coder):
chunks.cur = list(self.cur_messages) chunks.cur = list(self.cur_messages)
chunks.reminder = [] chunks.reminder = []
# TODO review impact of token count on image messages # Use accurate token counting method that considers enhanced context blocks
messages_tokens = self.main_model.token_count(chunks.all_messages()) base_messages = chunks.all_messages()
messages_tokens = self.main_model.token_count(base_messages)
reminder_tokens = self.main_model.token_count(reminder_message) reminder_tokens = self.main_model.token_count(reminder_message)
cur_tokens = self.main_model.token_count(chunks.cur) cur_tokens = self.main_model.token_count(chunks.cur)
if None not in (messages_tokens, reminder_tokens, cur_tokens): if None not in (messages_tokens, reminder_tokens, cur_tokens):
total_tokens = messages_tokens + reminder_tokens + cur_tokens total_tokens = messages_tokens
# Only add tokens for reminder and cur if they're not already included
# in the messages_tokens calculation
if not chunks.reminder:
total_tokens += reminder_tokens
if not chunks.cur:
total_tokens += cur_tokens
else: else:
# add the reminder anyway # add the reminder anyway
total_tokens = 0 total_tokens = 0
@ -351,7 +456,16 @@ class NavigatorCoder(Coder):
""" """
if not self.use_enhanced_context: if not self.use_enhanced_context:
return None return None
# If context_summary is already in the cache, return it
if hasattr(self, 'context_blocks_cache') and "context_summary" in self.context_blocks_cache:
return self.context_blocks_cache["context_summary"]
try: try:
# Make sure token counts are updated before generating the summary
if not hasattr(self, 'context_block_tokens') or not self.context_block_tokens:
self._calculate_context_block_tokens()
result = "<context name=\"context_summary\">\n" result = "<context name=\"context_summary\">\n"
result += "## Current Context Overview\n\n" result += "## Current Context Overview\n\n"
max_input_tokens = self.main_model.info.get("max_input_tokens") or 0 max_input_tokens = self.main_model.info.get("max_input_tokens") or 0
@ -401,27 +515,8 @@ class NavigatorCoder(Coder):
else: else:
result += "No read-only files in context\n\n" result += "No read-only files in context\n\n"
# Additional enhanced context blocks # Use the pre-calculated context block tokens
env_info = self.get_environment_info() extra_tokens = sum(self.context_block_tokens.values())
dir_structure = self.get_directory_structure()
git_status = self.get_git_status()
symbol_outline = self.get_context_symbol_outline()
extra_context = ""
extra_tokens = 0
if env_info:
extra_context += env_info + "\n\n"
extra_tokens += self.main_model.token_count(env_info)
if dir_structure:
extra_context += dir_structure + "\n\n"
extra_tokens += self.main_model.token_count(dir_structure)
if git_status:
extra_context += git_status + "\n\n"
extra_tokens += self.main_model.token_count(git_status)
if symbol_outline:
extra_context += symbol_outline + "\n\n"
extra_tokens += self.main_model.token_count(symbol_outline)
total_tokens = total_file_tokens + extra_tokens total_tokens = total_file_tokens + extra_tokens
result += f"**Total files usage: {total_file_tokens:,} tokens**\n\n" result += f"**Total files usage: {total_file_tokens:,} tokens**\n\n"
@ -435,6 +530,12 @@ class NavigatorCoder(Coder):
result += "- `[tool_call(Remove, file_path=\"path/to/large_file.ext\")]`\n" result += "- `[tool_call(Remove, file_path=\"path/to/large_file.ext\")]`\n"
result += "- Keep only essential files in context for best performance" result += "- Keep only essential files in context for best performance"
result += "\n</context>" result += "\n</context>"
# Cache the result
if not hasattr(self, 'context_blocks_cache'):
self.context_blocks_cache = {}
self.context_blocks_cache["context_summary"] = result
return result return result
except Exception as e: except Exception as e:
self.io.tool_error(f"Error generating context summary: {str(e)}") self.io.tool_error(f"Error generating context summary: {str(e)}")
@ -1647,8 +1748,15 @@ Just reply with fixed versions of the {blocks} above that failed to match.
if self.use_enhanced_context: if self.use_enhanced_context:
self.io.tool_output("Enhanced context blocks are now ON - directory structure and git status will be included.") self.io.tool_output("Enhanced context blocks are now ON - directory structure and git status will be included.")
# Mark tokens as needing calculation, but don't calculate yet (lazy calculation)
self.tokens_calculated = False
self.context_blocks_cache = {}
else: else:
self.io.tool_output("Enhanced context blocks are now OFF - directory structure and git status will not be included.") self.io.tool_output("Enhanced context blocks are now OFF - directory structure and git status will not be included.")
# Clear token counts and cache when disabled
self.context_block_tokens = {}
self.context_blocks_cache = {}
self.tokens_calculated = False
return True return True

View file

@ -464,6 +464,20 @@ class Commands:
tokens = self.coder.main_model.token_count(repo_content) tokens = self.coder.main_model.token_count(repo_content)
res.append((tokens, "repository map", "use --map-tokens to resize")) res.append((tokens, "repository map", "use --map-tokens to resize"))
# Enhanced context blocks (only for navigator mode)
if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
# Force token calculation if it hasn't been done yet
if hasattr(self.coder, '_calculate_context_block_tokens'):
if not hasattr(self.coder, 'tokens_calculated') or not self.coder.tokens_calculated:
self.coder._calculate_context_block_tokens()
# Add enhanced context blocks to the display
if hasattr(self.coder, 'context_block_tokens') and self.coder.context_block_tokens:
for block_name, tokens in self.coder.context_block_tokens.items():
# Format the block name more nicely
display_name = block_name.replace('_', ' ').title()
res.append((tokens, f"{display_name} context block", "/context-blocks to toggle"))
fence = "`" * 3 fence = "`" * 3
file_res = [] file_res = []
@ -873,6 +887,11 @@ class Commands:
self.io.tool_output(f"Added {fname} to the chat") self.io.tool_output(f"Added {fname} to the chat")
self.coder.check_added_files() self.coder.check_added_files()
# Recalculate context block tokens if using navigator mode
if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
if hasattr(self.coder, '_calculate_context_block_tokens'):
self.coder._calculate_context_block_tokens()
def completions_drop(self): def completions_drop(self):
files = self.coder.get_inchat_relative_files() files = self.coder.get_inchat_relative_files()
read_only_files = [self.coder.get_rel_fname(fn) for fn in self.coder.abs_read_only_fnames] read_only_files = [self.coder.get_rel_fname(fn) for fn in self.coder.abs_read_only_fnames]
@ -891,9 +910,16 @@ class Commands:
else: else:
self.io.tool_output("Dropping all files from the chat session.") self.io.tool_output("Dropping all files from the chat session.")
self._drop_all_files() self._drop_all_files()
# Recalculate context block tokens after dropping all files
if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
if hasattr(self.coder, '_calculate_context_block_tokens'):
self.coder._calculate_context_block_tokens()
return return
filenames = parse_quoted_filenames(args) filenames = parse_quoted_filenames(args)
files_changed = False
for word in filenames: for word in filenames:
# Expand tilde in the path # Expand tilde in the path
expanded_word = os.path.expanduser(word) expanded_word = os.path.expanduser(word)
@ -916,6 +942,7 @@ class Commands:
for matched_file in read_only_matched: for matched_file in read_only_matched:
self.coder.abs_read_only_fnames.remove(matched_file) self.coder.abs_read_only_fnames.remove(matched_file)
self.io.tool_output(f"Removed read-only file {matched_file} from the chat") self.io.tool_output(f"Removed read-only file {matched_file} from the chat")
files_changed = True
# For editable files, use glob if word contains glob chars, otherwise use substring # For editable files, use glob if word contains glob chars, otherwise use substring
if any(c in expanded_word for c in "*?[]"): if any(c in expanded_word for c in "*?[]"):
@ -934,6 +961,12 @@ class Commands:
if abs_fname in self.coder.abs_fnames: if abs_fname in self.coder.abs_fnames:
self.coder.abs_fnames.remove(abs_fname) self.coder.abs_fnames.remove(abs_fname)
self.io.tool_output(f"Removed {matched_file} from the chat") self.io.tool_output(f"Removed {matched_file} from the chat")
files_changed = True
# Recalculate context block tokens if any files were changed and using navigator mode
if files_changed and hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
if hasattr(self.coder, '_calculate_context_block_tokens'):
self.coder._calculate_context_block_tokens()
def cmd_git(self, args): def cmd_git(self, args):
"Run a git command (output excluded from chat)" "Run a git command (output excluded from chat)"