From d89eeff13d713f1490dae236e7e4db9e3620629e Mon Sep 17 00:00:00 2001
From: Paul Gauthier <aider@paulg.org>
Date: Mon, 5 Aug 2024 20:42:36 -0300
Subject: [PATCH] fix: Implement efficient token counting for large text inputs

---
 aider/repomap.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/aider/repomap.py b/aider/repomap.py
index 231d319af..5f9bfd398 100644
--- a/aider/repomap.py
+++ b/aider/repomap.py
@@ -61,7 +61,21 @@ class RepoMap:
 
         self.repo_content_prefix = repo_content_prefix
 
-        self.token_count = main_model.token_count
+        self.main_model = main_model
+
+    def token_count(self, text):
+        len_text = len(text)
+        if len_text < 200:
+            return self.main_model.token_count(text)
+
+        lines = text.splitlines(keepends=True)
+        num_lines = len(lines)
+        step = num_lines // 100 or 1
+        lines = lines[::step]
+        sample_text = "".join(lines)
+        sample_tokens = self.main_model.token_count(sample_text)
+        est_tokens = sample_tokens / len(sample_text) * len_text
+        return est_tokens
 
     def get_repo_map(self, chat_files, other_files, mentioned_fnames=None, mentioned_idents=None):
         if self.max_map_tokens <= 0: