From d89eeff13d713f1490dae236e7e4db9e3620629e Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Mon, 5 Aug 2024 20:42:36 -0300 Subject: [PATCH] fix: Implement efficient token counting for large text inputs --- aider/repomap.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/aider/repomap.py b/aider/repomap.py index 231d319af..5f9bfd398 100644 --- a/aider/repomap.py +++ b/aider/repomap.py @@ -61,7 +61,21 @@ class RepoMap: self.repo_content_prefix = repo_content_prefix - self.token_count = main_model.token_count + self.main_model = main_model + + def token_count(self, text): + len_text = len(text) + if len_text < 200: + return self.main_model.token_count(text) + + lines = text.splitlines(keepends=True) + num_lines = len(lines) + step = num_lines // 100 or 1 + lines = lines[::step] + sample_text = "".join(lines) + sample_tokens = self.main_model.token_count(sample_text) + est_tokens = sample_tokens / len(sample_text) * len_text + return est_tokens def get_repo_map(self, chat_files, other_files, mentioned_fnames=None, mentioned_idents=None): if self.max_map_tokens <= 0: