mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-29 16:54:59 +00:00
cleanup ranked_tags_map cache and token estimator
This commit is contained in:
parent
16defb3c8c
commit
95e603350c
2 changed files with 8 additions and 52 deletions
|
@ -3,13 +3,13 @@
|
||||||
|
|
||||||
### main branch
|
### main branch
|
||||||
|
|
||||||
|
- Performance improvements for large/mono repos.
|
||||||
- Added `--subtree-only` to limit aider to current directory subtree.
|
- Added `--subtree-only` to limit aider to current directory subtree.
|
||||||
- Should help with large/mono repo performance.
|
- Should help with large/mono repo performance.
|
||||||
- New `/add-clipboard-image` to add images to the chat from your clipboard.
|
- New `/add-clipboard-image` to add images to the chat from your clipboard.
|
||||||
- Use `--map-tokens 1024` to use repo map with any model.
|
- Use `--map-tokens 1024` to use repo map with any model.
|
||||||
- Support for Sonnet's 8k output window.
|
- Support for Sonnet's 8k output window.
|
||||||
- [Aider already supported infinite output from Sonnet.](https://aider.chat/2024/07/01/sonnet-not-lazy.html)
|
- [Aider already supported infinite output from Sonnet.](https://aider.chat/2024/07/01/sonnet-not-lazy.html)
|
||||||
- Performance improvements for large repos.
|
|
||||||
- Workaround litellm bug for retrying API server errors.
|
- Workaround litellm bug for retrying API server errors.
|
||||||
- Upgraded dependencies, to pick up litellm bug fixes.
|
- Upgraded dependencies, to pick up litellm bug fixes.
|
||||||
- Aider wrote 48% of the code since the last release.
|
- Aider wrote 48% of the code since the last release.
|
||||||
|
|
|
@ -34,10 +34,6 @@ class RepoMap:
|
||||||
|
|
||||||
tokens_per_char = None
|
tokens_per_char = None
|
||||||
|
|
||||||
# Cache for get_ranked_tags_map
|
|
||||||
_last_ranked_tags_map = None
|
|
||||||
_last_ranked_tags_map_args = None
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
map_tokens=1024,
|
map_tokens=1024,
|
||||||
|
@ -64,23 +60,7 @@ class RepoMap:
|
||||||
|
|
||||||
self.repo_content_prefix = repo_content_prefix
|
self.repo_content_prefix = repo_content_prefix
|
||||||
|
|
||||||
self.main_model = main_model
|
self.token_count = main_model.token_count
|
||||||
|
|
||||||
def token_count(self, text):
|
|
||||||
if self.tokens_per_char:
|
|
||||||
return len(text) / self.tokens_per_char
|
|
||||||
|
|
||||||
sample_text = text.splitlines(keepends=True)
|
|
||||||
samples = 300
|
|
||||||
if len(sample_text) < samples:
|
|
||||||
return self.main_model.token_count(text)
|
|
||||||
|
|
||||||
sample_text = "".join(random.sample(sample_text, samples))
|
|
||||||
tokens = self.main_model.token_count(sample_text)
|
|
||||||
self.tokens_per_char = tokens / len(sample_text)
|
|
||||||
|
|
||||||
return len(text) / self.tokens_per_char
|
|
||||||
return tokens
|
|
||||||
|
|
||||||
def get_repo_map(self, chat_files, other_files, mentioned_fnames=None, mentioned_idents=None):
|
def get_repo_map(self, chat_files, other_files, mentioned_fnames=None, mentioned_idents=None):
|
||||||
if self.max_map_tokens <= 0:
|
if self.max_map_tokens <= 0:
|
||||||
|
@ -413,18 +393,6 @@ class RepoMap:
|
||||||
mentioned_fnames=None,
|
mentioned_fnames=None,
|
||||||
mentioned_idents=None,
|
mentioned_idents=None,
|
||||||
):
|
):
|
||||||
# Check if the arguments match the last call
|
|
||||||
current_args = (
|
|
||||||
tuple(sorted(chat_fnames)),
|
|
||||||
tuple(sorted(other_fnames)) if other_fnames else None,
|
|
||||||
max_map_tokens,
|
|
||||||
frozenset(sorted(mentioned_fnames)) if mentioned_fnames else None,
|
|
||||||
frozenset(sorted(mentioned_idents)) if mentioned_idents else None,
|
|
||||||
)
|
|
||||||
|
|
||||||
if current_args == self._last_ranked_tags_map_args:
|
|
||||||
return self._last_ranked_tags_map
|
|
||||||
|
|
||||||
if not other_fnames:
|
if not other_fnames:
|
||||||
other_fnames = list()
|
other_fnames = list()
|
||||||
if not max_map_tokens:
|
if not max_map_tokens:
|
||||||
|
@ -456,29 +424,22 @@ class RepoMap:
|
||||||
|
|
||||||
self.tree_cache = dict()
|
self.tree_cache = dict()
|
||||||
|
|
||||||
# Estimate initial middle value
|
middle = min(max_map_tokens // 25, num_tags)
|
||||||
sample_size = min(100, num_tags)
|
|
||||||
sample_tree = self.to_tree(ranked_tags[:sample_size], chat_rel_fnames)
|
|
||||||
sample_tokens = self.token_count(sample_tree)
|
|
||||||
|
|
||||||
if sample_tokens > 0:
|
|
||||||
estimated_tags = int((max_map_tokens / sample_tokens) * sample_size * 1.5)
|
|
||||||
middle = min(estimated_tags, num_tags)
|
|
||||||
else:
|
|
||||||
middle = min(max_map_tokens // 50, num_tags)
|
|
||||||
|
|
||||||
while lower_bound <= upper_bound:
|
while lower_bound <= upper_bound:
|
||||||
|
# dump(lower_bound, middle, upper_bound)
|
||||||
|
|
||||||
spin.step()
|
spin.step()
|
||||||
|
|
||||||
tree = self.to_tree(ranked_tags[:middle], chat_rel_fnames)
|
tree = self.to_tree(ranked_tags[:middle], chat_rel_fnames)
|
||||||
num_tokens = self.token_count(tree)
|
num_tokens = self.token_count(tree)
|
||||||
|
|
||||||
pct_err = abs(num_tokens - max_map_tokens) / max_map_tokens
|
pct_err = abs(num_tokens - max_map_tokens) / max_map_tokens
|
||||||
if (num_tokens <= max_map_tokens and num_tokens > best_tree_tokens) or pct_err < 0.1:
|
ok_err = 0.15
|
||||||
|
if (num_tokens <= max_map_tokens and num_tokens > best_tree_tokens) or pct_err < ok_err:
|
||||||
best_tree = tree
|
best_tree = tree
|
||||||
best_tree_tokens = num_tokens
|
best_tree_tokens = num_tokens
|
||||||
|
|
||||||
if pct_err < 0.1:
|
if pct_err < ok_err:
|
||||||
break
|
break
|
||||||
|
|
||||||
if num_tokens < max_map_tokens:
|
if num_tokens < max_map_tokens:
|
||||||
|
@ -489,11 +450,6 @@ class RepoMap:
|
||||||
middle = (lower_bound + upper_bound) // 2
|
middle = (lower_bound + upper_bound) // 2
|
||||||
|
|
||||||
spin.end()
|
spin.end()
|
||||||
|
|
||||||
# Cache the result
|
|
||||||
self._last_ranked_tags_map = best_tree
|
|
||||||
self._last_ranked_tags_map_args = current_args
|
|
||||||
|
|
||||||
return best_tree
|
return best_tree
|
||||||
|
|
||||||
tree_cache = dict()
|
tree_cache = dict()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue