mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-01 02:05:00 +00:00
Estimate tokenization to speed up repo map
This commit is contained in:
parent
de26f8e5e0
commit
75c3c40354
1 changed files with 22 additions and 3 deletions
|
@ -29,9 +29,10 @@ def print_elapsed(message):
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
if hasattr(print_elapsed, "last_time"):
|
if hasattr(print_elapsed, "last_time"):
|
||||||
elapsed = current_time - print_elapsed.last_time
|
elapsed = current_time - print_elapsed.last_time
|
||||||
print(f"{message}: {elapsed:.2f} seconds")
|
print(f"... {elapsed:.2f} seconds")
|
||||||
|
print(message)
|
||||||
else:
|
else:
|
||||||
print(f"{message}: (first measurement)")
|
print(f"{message}:")
|
||||||
print_elapsed.last_time = current_time
|
print_elapsed.last_time = current_time
|
||||||
|
|
||||||
|
|
||||||
|
@ -43,6 +44,8 @@ class RepoMap:
|
||||||
|
|
||||||
warned_files = set()
|
warned_files = set()
|
||||||
|
|
||||||
|
tokens_per_char = None
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
map_tokens=1024,
|
map_tokens=1024,
|
||||||
|
@ -67,9 +70,22 @@ class RepoMap:
|
||||||
self.map_mul_no_files = map_mul_no_files
|
self.map_mul_no_files = map_mul_no_files
|
||||||
self.max_context_window = max_context_window
|
self.max_context_window = max_context_window
|
||||||
|
|
||||||
self.token_count = main_model.token_count
|
|
||||||
self.repo_content_prefix = repo_content_prefix
|
self.repo_content_prefix = repo_content_prefix
|
||||||
|
|
||||||
|
self.main_model = main_model
|
||||||
|
|
||||||
|
def token_count(self, text):
|
||||||
|
if self.tokens_per_char:
|
||||||
|
return len(text) / self.tokens_per_char
|
||||||
|
|
||||||
|
sample_text = text.splitlines(keepends=True)
|
||||||
|
sample_text = "".join(random.sample(sample_text, 150))
|
||||||
|
tokens = self.main_model.token_count(sample_text)
|
||||||
|
self.tokens_per_char = tokens / len(sample_text)
|
||||||
|
|
||||||
|
return len(text) / self.tokens_per_char
|
||||||
|
return tokens
|
||||||
|
|
||||||
def get_repo_map(self, chat_files, other_files, mentioned_fnames=None, mentioned_idents=None):
|
def get_repo_map(self, chat_files, other_files, mentioned_fnames=None, mentioned_idents=None):
|
||||||
if self.max_map_tokens <= 0:
|
if self.max_map_tokens <= 0:
|
||||||
return
|
return
|
||||||
|
@ -80,6 +96,9 @@ class RepoMap:
|
||||||
if not mentioned_idents:
|
if not mentioned_idents:
|
||||||
mentioned_idents = set()
|
mentioned_idents = set()
|
||||||
|
|
||||||
|
# reset the estimate
|
||||||
|
self.tokens_per_char = None
|
||||||
|
|
||||||
max_map_tokens = self.max_map_tokens
|
max_map_tokens = self.max_map_tokens
|
||||||
|
|
||||||
# With no files in the chat, give a bigger view of the entire repo
|
# With no files in the chat, give a bigger view of the entire repo
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue