refactor: Update context handling and repo map logic

This commit is contained in:
Paul Gauthier 2025-03-22 16:19:39 -07:00
parent a3377686fa
commit b591b64d3f
5 changed files with 52 additions and 16 deletions

View file

@ -8,6 +8,16 @@ class ContextCoder(Coder):
edit_format = "context"
gpt_prompts = ContextPrompts()
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if not self.repo_map:
return
self.repo_map.refresh = "always"
self.repo_map.max_map_tokens *= self.repo_map.map_mul_no_files
self.repo_map.map_mul_no_files = 1.0
def reply_completed(self):
content = self.partial_response_content
if not content or not content.strip():

View file

@ -5,19 +5,41 @@ from .base_prompts import CoderPrompts
class ContextPrompts(CoderPrompts):
main_system = """Act as an expert code analyst.
Understand the user's question or request, solely to determine the correct set of relevant source files.
Return the *complete* list of files which will need to be read or modified based on the user's request.
Understand the user's question or request, solely to determine ALL the existing sources files which will need to be modified.
Return the *complete* list of files which will need to be modified based on the user's request.
Explain why each file is needed, including names of key classes/functions/methods/variables.
Be sure to include or omit the names of files already added to the chat, based on whether they are actually needed or not.
Be selective!
Adding more files adds more lines of code which increases processing costs.
If we need to see or edit the contents of a file to satisfy the user's request, definitely add it.
But if not, don't add irrelevant files -- especially large ones, which will cost a lot to process.
The user will use every file you mention, regardless of your commentary.
So *ONLY* mention the names of relevant files.
If a file is not relevant DO NOT mention it.
Only return files that will need to be modified, not files that contain useful/relevant functions.
You are only to discuss EXISTING files and symbols.
Only return existing files, don't suggest the names of new files we will need to create.
Always reply to the user in {language}.
Return a simple bulleted list:
Be concise in your replies.
Return:
1. A bulleted list of files the will need to be edited, and symbols that are highly relevant to the user's request.
2. A list of classes/functions/methods/variables that are located OUTSIDE those files which will need to be understood. Just the symbols names, *NOT* file names.
Here an example response, use this format:
## Files to modify, with their relevant symbols:
- alarms/buzz.py
- `Buzzer` class which can make the needed sound
- `Buzzer.buzz_buzz()` method triggers the sound
- alarms/time.py
- `Time.set_alarm(hour, minute)` to set the alarm
## Relevant symbols from OTHER files:
- AlarmManager class for setup/teardown of alarms
- SoundFactory will be used to create a Buzzer
"""
example_messages = []
@ -46,6 +68,8 @@ NEVER RETURN CODE!
"""
try_again = """I have updated the set of files added to the chat.
Review them to decide if this is the correct set of files or if we need to add more.
Review them to decide if this is the correct set of files or if we need to add more or remove files.
If this is the right set, just return the current list of files.
Or return a smaller or larger set of files which need to be edited, with symbols that are highly relevant to the user's request.
"""

View file

@ -149,7 +149,7 @@ class Commands:
),
(
"context",
"Work with surrounding code context for more contextually-aware edits.",
"Automatically identify which files will need to be edited.",
),
]
)

View file

@ -748,7 +748,6 @@ class Model(ModelSettings):
kwargs = dict(
model=self.name,
messages=messages,
stream=stream,
)
@ -779,6 +778,8 @@ class Model(ModelSettings):
kwargs["timeout"] = request_timeout
if self.verbose:
dump(kwargs)
kwargs["messages"] = messages
res = litellm.completion(**kwargs)
return hash_object, res

View file

@ -447,16 +447,17 @@ class RepoMap:
definers = defines[ident]
mul = 1.0
# Check for snake_case (contains underscore, no uppercase)
if "_" in ident and not any(c.isupper() for c in ident):
mul *= 10
# Check for camelCase (no underscore, starts with lowercase, has uppercase)
elif not "_" in ident and ident[0].islower() and any(c.isupper() for c in ident):
mul *= 10
is_snake = ("_" in ident) and any(c.isalpha() for c in ident)
is_camel = any(c.isupper() for c in ident) and any(c.islower() for c in ident)
if ident in mentioned_idents:
mul *= 10
if (is_snake or is_camel) and len(ident) >= 8:
mul *= 10
if ident.startswith("_"):
mul *= 0.1
if len(defines[ident]) > 5:
mul *= 0.1
for referencer, num_refs in Counter(references[ident]).items():
for definer in definers: