mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-06 04:35:00 +00:00
Bring accurate enhanced context token counts to /tokens
This commit is contained in:
parent
b155143845
commit
b8f7757435
2 changed files with 170 additions and 29 deletions
|
@ -93,8 +93,100 @@ class NavigatorCoder(Coder):
|
||||||
# Enable enhanced context blocks by default
|
# Enable enhanced context blocks by default
|
||||||
self.use_enhanced_context = True
|
self.use_enhanced_context = True
|
||||||
|
|
||||||
|
# Initialize empty token tracking dictionary and cache structures
|
||||||
|
# but don't populate yet to avoid startup delay
|
||||||
|
self.context_block_tokens = {}
|
||||||
|
self.context_blocks_cache = {}
|
||||||
|
self.tokens_calculated = False
|
||||||
|
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def _calculate_context_block_tokens(self, force=False):
|
||||||
|
"""
|
||||||
|
Calculate token counts for all enhanced context blocks.
|
||||||
|
This is the central method for calculating token counts,
|
||||||
|
ensuring they're consistent across all parts of the code.
|
||||||
|
|
||||||
|
This method populates the cache for context blocks and calculates tokens.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
force: If True, recalculate tokens even if already calculated
|
||||||
|
"""
|
||||||
|
# Skip if already calculated and not forced
|
||||||
|
if hasattr(self, 'tokens_calculated') and self.tokens_calculated and not force:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Clear existing token counts
|
||||||
|
self.context_block_tokens = {}
|
||||||
|
|
||||||
|
# Initialize the cache for context blocks if needed
|
||||||
|
if not hasattr(self, 'context_blocks_cache'):
|
||||||
|
self.context_blocks_cache = {}
|
||||||
|
|
||||||
|
if not self.use_enhanced_context:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# First, clear the cache to force regeneration of all blocks
|
||||||
|
self.context_blocks_cache = {}
|
||||||
|
|
||||||
|
# Generate all context blocks and calculate token counts
|
||||||
|
block_types = ["environment_info", "directory_structure", "git_status", "symbol_outline"]
|
||||||
|
|
||||||
|
for block_type in block_types:
|
||||||
|
block_content = self._generate_context_block(block_type)
|
||||||
|
if block_content:
|
||||||
|
self.context_block_tokens[block_type] = self.main_model.token_count(block_content)
|
||||||
|
|
||||||
|
# Mark as calculated
|
||||||
|
self.tokens_calculated = True
|
||||||
|
except Exception as e:
|
||||||
|
# Silently handle errors during calculation
|
||||||
|
# This prevents errors in token counting from breaking the main functionality
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _generate_context_block(self, block_name):
|
||||||
|
"""
|
||||||
|
Generate a specific context block and cache it.
|
||||||
|
This is a helper method for get_cached_context_block.
|
||||||
|
"""
|
||||||
|
content = None
|
||||||
|
|
||||||
|
if block_name == "environment_info":
|
||||||
|
content = self.get_environment_info()
|
||||||
|
elif block_name == "directory_structure":
|
||||||
|
content = self.get_directory_structure()
|
||||||
|
elif block_name == "git_status":
|
||||||
|
content = self.get_git_status()
|
||||||
|
elif block_name == "symbol_outline":
|
||||||
|
content = self.get_context_symbol_outline()
|
||||||
|
elif block_name == "context_summary":
|
||||||
|
content = self.get_context_summary()
|
||||||
|
|
||||||
|
# Cache the result if it's not None
|
||||||
|
if content is not None:
|
||||||
|
self.context_blocks_cache[block_name] = content
|
||||||
|
|
||||||
|
return content
|
||||||
|
|
||||||
|
def get_cached_context_block(self, block_name):
|
||||||
|
"""
|
||||||
|
Get a context block from the cache, or generate it if not available.
|
||||||
|
This should be used by format_chat_chunks to avoid regenerating blocks.
|
||||||
|
|
||||||
|
This will ensure tokens are calculated if they haven't been yet.
|
||||||
|
"""
|
||||||
|
# Make sure tokens have been calculated at least once
|
||||||
|
if not hasattr(self, 'tokens_calculated') or not self.tokens_calculated:
|
||||||
|
self._calculate_context_block_tokens()
|
||||||
|
|
||||||
|
# Return from cache if available
|
||||||
|
if hasattr(self, 'context_blocks_cache') and block_name in self.context_blocks_cache:
|
||||||
|
return self.context_blocks_cache[block_name]
|
||||||
|
|
||||||
|
# Otherwise generate and cache the block
|
||||||
|
return self._generate_context_block(block_name)
|
||||||
|
|
||||||
def set_granular_editing(self, enabled):
|
def set_granular_editing(self, enabled):
|
||||||
"""
|
"""
|
||||||
Switch between granular editing tools and legacy search/replace.
|
Switch between granular editing tools and legacy search/replace.
|
||||||
|
@ -197,12 +289,18 @@ class NavigatorCoder(Coder):
|
||||||
if not self.use_enhanced_context:
|
if not self.use_enhanced_context:
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
# Generate all context blocks
|
# Make sure token counts are updated - using centralized method
|
||||||
env_context = self.get_environment_info()
|
# This also populates the context block cache
|
||||||
|
self._calculate_context_block_tokens()
|
||||||
|
|
||||||
|
# Get blocks from cache to avoid regenerating them
|
||||||
|
env_context = self.get_cached_context_block("environment_info")
|
||||||
|
dir_structure = self.get_cached_context_block("directory_structure")
|
||||||
|
git_status = self.get_cached_context_block("git_status")
|
||||||
|
symbol_outline = self.get_cached_context_block("symbol_outline")
|
||||||
|
|
||||||
|
# Context summary needs special handling because it depends on other blocks
|
||||||
context_summary = self.get_context_summary()
|
context_summary = self.get_context_summary()
|
||||||
dir_structure = self.get_directory_structure()
|
|
||||||
git_status = self.get_git_status()
|
|
||||||
symbol_outline = self.get_context_symbol_outline()
|
|
||||||
|
|
||||||
# 1. Add relatively static blocks BEFORE done_messages
|
# 1. Add relatively static blocks BEFORE done_messages
|
||||||
# These blocks change less frequently and can be part of the cacheable prefix
|
# These blocks change less frequently and can be part of the cacheable prefix
|
||||||
|
@ -308,13 +406,20 @@ class NavigatorCoder(Coder):
|
||||||
chunks.cur = list(self.cur_messages)
|
chunks.cur = list(self.cur_messages)
|
||||||
chunks.reminder = []
|
chunks.reminder = []
|
||||||
|
|
||||||
# TODO review impact of token count on image messages
|
# Use accurate token counting method that considers enhanced context blocks
|
||||||
messages_tokens = self.main_model.token_count(chunks.all_messages())
|
base_messages = chunks.all_messages()
|
||||||
|
messages_tokens = self.main_model.token_count(base_messages)
|
||||||
reminder_tokens = self.main_model.token_count(reminder_message)
|
reminder_tokens = self.main_model.token_count(reminder_message)
|
||||||
cur_tokens = self.main_model.token_count(chunks.cur)
|
cur_tokens = self.main_model.token_count(chunks.cur)
|
||||||
|
|
||||||
if None not in (messages_tokens, reminder_tokens, cur_tokens):
|
if None not in (messages_tokens, reminder_tokens, cur_tokens):
|
||||||
total_tokens = messages_tokens + reminder_tokens + cur_tokens
|
total_tokens = messages_tokens
|
||||||
|
# Only add tokens for reminder and cur if they're not already included
|
||||||
|
# in the messages_tokens calculation
|
||||||
|
if not chunks.reminder:
|
||||||
|
total_tokens += reminder_tokens
|
||||||
|
if not chunks.cur:
|
||||||
|
total_tokens += cur_tokens
|
||||||
else:
|
else:
|
||||||
# add the reminder anyway
|
# add the reminder anyway
|
||||||
total_tokens = 0
|
total_tokens = 0
|
||||||
|
@ -351,7 +456,16 @@ class NavigatorCoder(Coder):
|
||||||
"""
|
"""
|
||||||
if not self.use_enhanced_context:
|
if not self.use_enhanced_context:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# If context_summary is already in the cache, return it
|
||||||
|
if hasattr(self, 'context_blocks_cache') and "context_summary" in self.context_blocks_cache:
|
||||||
|
return self.context_blocks_cache["context_summary"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# Make sure token counts are updated before generating the summary
|
||||||
|
if not hasattr(self, 'context_block_tokens') or not self.context_block_tokens:
|
||||||
|
self._calculate_context_block_tokens()
|
||||||
|
|
||||||
result = "<context name=\"context_summary\">\n"
|
result = "<context name=\"context_summary\">\n"
|
||||||
result += "## Current Context Overview\n\n"
|
result += "## Current Context Overview\n\n"
|
||||||
max_input_tokens = self.main_model.info.get("max_input_tokens") or 0
|
max_input_tokens = self.main_model.info.get("max_input_tokens") or 0
|
||||||
|
@ -401,27 +515,8 @@ class NavigatorCoder(Coder):
|
||||||
else:
|
else:
|
||||||
result += "No read-only files in context\n\n"
|
result += "No read-only files in context\n\n"
|
||||||
|
|
||||||
# Additional enhanced context blocks
|
# Use the pre-calculated context block tokens
|
||||||
env_info = self.get_environment_info()
|
extra_tokens = sum(self.context_block_tokens.values())
|
||||||
dir_structure = self.get_directory_structure()
|
|
||||||
git_status = self.get_git_status()
|
|
||||||
symbol_outline = self.get_context_symbol_outline()
|
|
||||||
|
|
||||||
extra_context = ""
|
|
||||||
extra_tokens = 0
|
|
||||||
if env_info:
|
|
||||||
extra_context += env_info + "\n\n"
|
|
||||||
extra_tokens += self.main_model.token_count(env_info)
|
|
||||||
if dir_structure:
|
|
||||||
extra_context += dir_structure + "\n\n"
|
|
||||||
extra_tokens += self.main_model.token_count(dir_structure)
|
|
||||||
if git_status:
|
|
||||||
extra_context += git_status + "\n\n"
|
|
||||||
extra_tokens += self.main_model.token_count(git_status)
|
|
||||||
if symbol_outline:
|
|
||||||
extra_context += symbol_outline + "\n\n"
|
|
||||||
extra_tokens += self.main_model.token_count(symbol_outline)
|
|
||||||
|
|
||||||
total_tokens = total_file_tokens + extra_tokens
|
total_tokens = total_file_tokens + extra_tokens
|
||||||
|
|
||||||
result += f"**Total files usage: {total_file_tokens:,} tokens**\n\n"
|
result += f"**Total files usage: {total_file_tokens:,} tokens**\n\n"
|
||||||
|
@ -435,6 +530,12 @@ class NavigatorCoder(Coder):
|
||||||
result += "- `[tool_call(Remove, file_path=\"path/to/large_file.ext\")]`\n"
|
result += "- `[tool_call(Remove, file_path=\"path/to/large_file.ext\")]`\n"
|
||||||
result += "- Keep only essential files in context for best performance"
|
result += "- Keep only essential files in context for best performance"
|
||||||
result += "\n</context>"
|
result += "\n</context>"
|
||||||
|
|
||||||
|
# Cache the result
|
||||||
|
if not hasattr(self, 'context_blocks_cache'):
|
||||||
|
self.context_blocks_cache = {}
|
||||||
|
self.context_blocks_cache["context_summary"] = result
|
||||||
|
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.io.tool_error(f"Error generating context summary: {str(e)}")
|
self.io.tool_error(f"Error generating context summary: {str(e)}")
|
||||||
|
@ -1647,8 +1748,15 @@ Just reply with fixed versions of the {blocks} above that failed to match.
|
||||||
|
|
||||||
if self.use_enhanced_context:
|
if self.use_enhanced_context:
|
||||||
self.io.tool_output("Enhanced context blocks are now ON - directory structure and git status will be included.")
|
self.io.tool_output("Enhanced context blocks are now ON - directory structure and git status will be included.")
|
||||||
|
# Mark tokens as needing calculation, but don't calculate yet (lazy calculation)
|
||||||
|
self.tokens_calculated = False
|
||||||
|
self.context_blocks_cache = {}
|
||||||
else:
|
else:
|
||||||
self.io.tool_output("Enhanced context blocks are now OFF - directory structure and git status will not be included.")
|
self.io.tool_output("Enhanced context blocks are now OFF - directory structure and git status will not be included.")
|
||||||
|
# Clear token counts and cache when disabled
|
||||||
|
self.context_block_tokens = {}
|
||||||
|
self.context_blocks_cache = {}
|
||||||
|
self.tokens_calculated = False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
|
@ -464,6 +464,20 @@ class Commands:
|
||||||
tokens = self.coder.main_model.token_count(repo_content)
|
tokens = self.coder.main_model.token_count(repo_content)
|
||||||
res.append((tokens, "repository map", "use --map-tokens to resize"))
|
res.append((tokens, "repository map", "use --map-tokens to resize"))
|
||||||
|
|
||||||
|
# Enhanced context blocks (only for navigator mode)
|
||||||
|
if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
|
||||||
|
# Force token calculation if it hasn't been done yet
|
||||||
|
if hasattr(self.coder, '_calculate_context_block_tokens'):
|
||||||
|
if not hasattr(self.coder, 'tokens_calculated') or not self.coder.tokens_calculated:
|
||||||
|
self.coder._calculate_context_block_tokens()
|
||||||
|
|
||||||
|
# Add enhanced context blocks to the display
|
||||||
|
if hasattr(self.coder, 'context_block_tokens') and self.coder.context_block_tokens:
|
||||||
|
for block_name, tokens in self.coder.context_block_tokens.items():
|
||||||
|
# Format the block name more nicely
|
||||||
|
display_name = block_name.replace('_', ' ').title()
|
||||||
|
res.append((tokens, f"{display_name} context block", "/context-blocks to toggle"))
|
||||||
|
|
||||||
fence = "`" * 3
|
fence = "`" * 3
|
||||||
|
|
||||||
file_res = []
|
file_res = []
|
||||||
|
@ -873,6 +887,11 @@ class Commands:
|
||||||
self.io.tool_output(f"Added {fname} to the chat")
|
self.io.tool_output(f"Added {fname} to the chat")
|
||||||
self.coder.check_added_files()
|
self.coder.check_added_files()
|
||||||
|
|
||||||
|
# Recalculate context block tokens if using navigator mode
|
||||||
|
if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
|
||||||
|
if hasattr(self.coder, '_calculate_context_block_tokens'):
|
||||||
|
self.coder._calculate_context_block_tokens()
|
||||||
|
|
||||||
def completions_drop(self):
|
def completions_drop(self):
|
||||||
files = self.coder.get_inchat_relative_files()
|
files = self.coder.get_inchat_relative_files()
|
||||||
read_only_files = [self.coder.get_rel_fname(fn) for fn in self.coder.abs_read_only_fnames]
|
read_only_files = [self.coder.get_rel_fname(fn) for fn in self.coder.abs_read_only_fnames]
|
||||||
|
@ -891,9 +910,16 @@ class Commands:
|
||||||
else:
|
else:
|
||||||
self.io.tool_output("Dropping all files from the chat session.")
|
self.io.tool_output("Dropping all files from the chat session.")
|
||||||
self._drop_all_files()
|
self._drop_all_files()
|
||||||
|
|
||||||
|
# Recalculate context block tokens after dropping all files
|
||||||
|
if hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
|
||||||
|
if hasattr(self.coder, '_calculate_context_block_tokens'):
|
||||||
|
self.coder._calculate_context_block_tokens()
|
||||||
return
|
return
|
||||||
|
|
||||||
filenames = parse_quoted_filenames(args)
|
filenames = parse_quoted_filenames(args)
|
||||||
|
files_changed = False
|
||||||
|
|
||||||
for word in filenames:
|
for word in filenames:
|
||||||
# Expand tilde in the path
|
# Expand tilde in the path
|
||||||
expanded_word = os.path.expanduser(word)
|
expanded_word = os.path.expanduser(word)
|
||||||
|
@ -916,6 +942,7 @@ class Commands:
|
||||||
for matched_file in read_only_matched:
|
for matched_file in read_only_matched:
|
||||||
self.coder.abs_read_only_fnames.remove(matched_file)
|
self.coder.abs_read_only_fnames.remove(matched_file)
|
||||||
self.io.tool_output(f"Removed read-only file {matched_file} from the chat")
|
self.io.tool_output(f"Removed read-only file {matched_file} from the chat")
|
||||||
|
files_changed = True
|
||||||
|
|
||||||
# For editable files, use glob if word contains glob chars, otherwise use substring
|
# For editable files, use glob if word contains glob chars, otherwise use substring
|
||||||
if any(c in expanded_word for c in "*?[]"):
|
if any(c in expanded_word for c in "*?[]"):
|
||||||
|
@ -934,6 +961,12 @@ class Commands:
|
||||||
if abs_fname in self.coder.abs_fnames:
|
if abs_fname in self.coder.abs_fnames:
|
||||||
self.coder.abs_fnames.remove(abs_fname)
|
self.coder.abs_fnames.remove(abs_fname)
|
||||||
self.io.tool_output(f"Removed {matched_file} from the chat")
|
self.io.tool_output(f"Removed {matched_file} from the chat")
|
||||||
|
files_changed = True
|
||||||
|
|
||||||
|
# Recalculate context block tokens if any files were changed and using navigator mode
|
||||||
|
if files_changed and hasattr(self.coder, 'use_enhanced_context') and self.coder.use_enhanced_context:
|
||||||
|
if hasattr(self.coder, '_calculate_context_block_tokens'):
|
||||||
|
self.coder._calculate_context_block_tokens()
|
||||||
|
|
||||||
def cmd_git(self, args):
|
def cmd_git(self, args):
|
||||||
"Run a git command (output excluded from chat)"
|
"Run a git command (output excluded from chat)"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue