Try for more cache-friendly prompt ordering

This commit is contained in:
Amar Sood (tekacs) 2025-04-13 15:42:09 -04:00
parent 930880151e
commit 02c092afff

View file

@ -15,7 +15,7 @@ from xml.etree.ElementTree import ParseError
# Add necessary imports if not already present # Add necessary imports if not already present
from collections import defaultdict from collections import defaultdict
from .base_coder import Coder from .base_coder import Coder, ChatChunks
from .editblock_coder import find_original_update_blocks, do_replace, find_similar_lines from .editblock_coder import find_original_update_blocks, do_replace, find_similar_lines
from .navigator_prompts import NavigatorPrompts from .navigator_prompts import NavigatorPrompts
from .navigator_legacy_prompts import NavigatorLegacyPrompts from .navigator_legacy_prompts import NavigatorLegacyPrompts
@ -183,63 +183,164 @@ class NavigatorCoder(Coder):
Override parent's format_chat_chunks to include enhanced context blocks with a Override parent's format_chat_chunks to include enhanced context blocks with a
cleaner, more hierarchical structure for better organization. cleaner, more hierarchical structure for better organization.
Optimized for prompt caching: enhanced context blocks are inserted after static Optimized for prompt caching by placing context blocks strategically:
chat elements (system, examples, repo, readonly_files, done) but before variable 1. Relatively static blocks (directory structure, environment info) before done_messages
elements (chat_files, cur, reminder) to preserve prefix caching while providing 2. Dynamic blocks (context summary, symbol outline, git status) after chat_files
fresh context information.
This approach preserves prefix caching while providing fresh context information.
""" """
# First get the normal chat chunks from the parent method # First get the normal chat chunks from the parent method without calling super
chunks = super().format_chat_chunks() # Calls BaseCoder's format_chat_chunks # We'll manually build the chunks to control placement of context blocks
chunks = self.format_chat_chunks_base()
# If enhanced context blocks are enabled, insert them in a strategic position # If enhanced context blocks are not enabled, just return the base chunks
if self.use_enhanced_context: if not self.use_enhanced_context:
# Create environment info context block return chunks
# Generate all context blocks
env_context = self.get_environment_info() env_context = self.get_environment_info()
# Get current context summary
context_summary = self.get_context_summary() context_summary = self.get_context_summary()
# Get directory structure
dir_structure = self.get_directory_structure() dir_structure = self.get_directory_structure()
# Get git status
git_status = self.get_git_status() git_status = self.get_git_status()
# Get symbol outline for current context files
symbol_outline = self.get_context_symbol_outline() symbol_outline = self.get_context_symbol_outline()
# Collect all context blocks that exist # 1. Add relatively static blocks BEFORE done_messages
context_blocks = [] # These blocks change less frequently and can be part of the cacheable prefix
if env_context: static_blocks = []
context_blocks.append(env_context)
if context_summary:
context_blocks.append(context_summary)
if dir_structure: if dir_structure:
context_blocks.append(dir_structure) static_blocks.append(dir_structure)
if git_status: if env_context:
context_blocks.append(git_status) static_blocks.append(env_context)
if symbol_outline: # Add the new block if it was generated
context_blocks.append(symbol_outline)
# Insert a fresh context update as a separate message before current messages if static_blocks:
# This preserves cacheable prefix portions (system, examples, repo, etc.) static_message = "\n\n".join(static_blocks)
# while still providing fresh context information # Insert as a system message right before done_messages
if context_blocks and chunks.cur: chunks.done.insert(0, dict(role="system", content=static_message))
context_message = "\n\n".join(context_blocks)
# Insert fresh context as a system message right before the first user message in cur # 2. Add dynamic blocks AFTER chat_files
for i, msg in enumerate(chunks.cur): # These blocks change with the current files in context
if msg["role"] == "user": dynamic_blocks = []
# Insert context message right before the first user message if context_summary:
chunks.cur.insert(i, dict(role="system", content=context_message)) dynamic_blocks.append(context_summary)
break if symbol_outline:
dynamic_blocks.append(symbol_outline)
if git_status:
dynamic_blocks.append(git_status)
if dynamic_blocks:
dynamic_message = "\n\n".join(dynamic_blocks)
# Append as a system message after chat_files
chunks.chat_files.append(dict(role="system", content=dynamic_message))
return chunks
def format_chat_chunks_base(self):
"""
Create base chat chunks without enhanced context blocks.
This is a copy of the parent's format_chat_chunks method to avoid
calling super() which would create a recursive loop.
"""
self.choose_fence()
main_sys = self.fmt_system_prompt(self.gpt_prompts.main_system)
example_messages = []
if self.main_model.examples_as_sys_msg:
if self.gpt_prompts.example_messages:
main_sys += "\n# Example conversations:\n\n"
for msg in self.gpt_prompts.example_messages:
role = msg["role"]
content = self.fmt_system_prompt(msg["content"])
main_sys += f"## {role.upper()}: {content}\n\n"
main_sys = main_sys.strip()
else: else:
# If no user message found, append to the end of chat_files for msg in self.gpt_prompts.example_messages:
# (just before any existing cur messages) example_messages.append(
chunks.chat_files.append(dict(role="system", content=context_message)) dict(
elif context_blocks: role=msg["role"],
# If there are context blocks but no cur messages, append to chat_files content=self.fmt_system_prompt(msg["content"]),
context_message = "\n\n".join(context_blocks) )
chunks.chat_files.append(dict(role="system", content=context_message)) )
if self.gpt_prompts.example_messages:
example_messages += [
dict(
role="user",
content=(
"I switched to a new code base. Please don't consider the above files"
" or try to edit them any longer."
),
),
dict(role="assistant", content="Ok."),
]
if self.gpt_prompts.system_reminder:
main_sys += "\n" + self.fmt_system_prompt(self.gpt_prompts.system_reminder)
chunks = ChatChunks()
if self.main_model.use_system_prompt:
chunks.system = [
dict(role="system", content=main_sys),
]
else:
chunks.system = [
dict(role="user", content=main_sys),
dict(role="assistant", content="Ok."),
]
chunks.examples = example_messages
self.summarize_end()
chunks.done = self.done_messages
chunks.repo = self.get_repo_messages()
chunks.readonly_files = self.get_readonly_files_messages()
chunks.chat_files = self.get_chat_files_messages()
if self.gpt_prompts.system_reminder:
reminder_message = [
dict(
role="system", content=self.fmt_system_prompt(self.gpt_prompts.system_reminder)
),
]
else:
reminder_message = []
chunks.cur = list(self.cur_messages)
chunks.reminder = []
# TODO review impact of token count on image messages
messages_tokens = self.main_model.token_count(chunks.all_messages())
reminder_tokens = self.main_model.token_count(reminder_message)
cur_tokens = self.main_model.token_count(chunks.cur)
if None not in (messages_tokens, reminder_tokens, cur_tokens):
total_tokens = messages_tokens + reminder_tokens + cur_tokens
else:
# add the reminder anyway
total_tokens = 0
if chunks.cur:
final = chunks.cur[-1]
else:
final = None
max_input_tokens = self.main_model.info.get("max_input_tokens") or 0
# Add the reminder prompt if we still have room to include it.
if (
not max_input_tokens
or total_tokens < max_input_tokens
and self.gpt_prompts.system_reminder
):
if self.main_model.reminder == "sys":
chunks.reminder = reminder_message
elif self.main_model.reminder == "user" and final and final["role"] == "user":
# stuff it into the user message
new_content = (
final["content"]
+ "\n\n"
+ self.fmt_system_prompt(self.gpt_prompts.system_reminder)
)
chunks.cur[-1] = dict(role=final["role"], content=new_content)
return chunks return chunks
@ -865,14 +966,16 @@ class NavigatorCoder(Coder):
content = params.get('content') content = params.get('content')
after_pattern = params.get('after_pattern') after_pattern = params.get('after_pattern')
before_pattern = params.get('before_pattern') before_pattern = params.get('before_pattern')
near_context = params.get('near_context') # New occurrence = params.get('occurrence', 1) # Default 1
occurrence = params.get('occurrence', 1) # New, default 1
change_id = params.get('change_id') change_id = params.get('change_id')
dry_run = params.get('dry_run', False) # New, default False dry_run = params.get('dry_run', False) # Default False
position = params.get('position')
auto_indent = params.get('auto_indent', True) # Default True
use_regex = params.get('use_regex', False) # Default False
if file_path is not None and content is not None and (after_pattern is not None or before_pattern is not None): if file_path is not None and content is not None and (after_pattern is not None or before_pattern is not None or position is not None):
result_message = _execute_insert_block( result_message = _execute_insert_block(
self, file_path, content, after_pattern, before_pattern, near_context, occurrence, change_id, dry_run self, file_path, content, after_pattern, before_pattern, occurrence, change_id, dry_run, position, auto_indent, use_regex
) )
else: else:
result_message = "Error: Missing required parameters for InsertBlock (file_path, content, and either after_pattern or before_pattern)" result_message = "Error: Missing required parameters for InsertBlock (file_path, content, and either after_pattern or before_pattern)"