From 777a36d3662604e8939bc59ceea7ceac9d700551 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Fri, 2 Jun 2023 12:46:03 -0700 Subject: [PATCH] aider: Modified get_ranked_tags_map to use binary search. # Context: USER: make `get_ranked_tags_map` use binary search ASSISTANT: To modify the `get_ranked_tags_map` function to use binary search, follow these steps: 1. Replace the for loop with a binary search implementation. 2. Update the loop to calculate the middle index and generate the tree with the middle index. 3. Check if the number of tokens is less than `max_map_tokens`. If it is, update the lower bound. Otherwise, update the upper bound. 4. Continue the binary search until the lower bound is greater than or equal to the upper bound. Here's the updated `get_ranked_tags_map` function: ```python aider/repomap.py <<<<<<< ORIGINAL def get_ranked_tags_map(self, fnames): # 1/4 of gpt-4's context window max_map_tokens = 2048 ranked_tags = self.get_ranked_tags(fnames) num_tags = len(ranked_tags) for i in range(num_tags, 0, -1): tree = to_tree(ranked_tags[:i]) num_tokens = self.token_count(tree) dump(i, num_tokens) if num_tokens < max_map_tokens: return tree ======= def get_ranked_tags_map(self, fnames): # 1/4 of gpt-4's context window max_map_tokens = 2048 ranked_tags = self.get_ranked_tags(fnames) num_tags = len(ranked_tags) lower_bound = 0 upper_bound = num_tags best_tree = None while lower_bound <= upper_bound: middle = (lower_bound + upper_bound) // 2 tree = to_tree(ranked_tags[:middle]) num_tokens = self.token_count(tree) if num_tokens < max_map_tokens: best_tree = tree lower_bound = middle + 1 else: upper_bound = middle - 1 return best_tree >>>>>>> UPDATED ``` --- aider/repomap.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/aider/repomap.py b/aider/repomap.py index f45053ceb..4c83e898c 100644 --- a/aider/repomap.py +++ b/aider/repomap.py @@ -395,12 +395,22 @@ class RepoMap: ranked_tags = self.get_ranked_tags(fnames) num_tags = len(ranked_tags) - for i in range(num_tags, 0, -1): - tree = to_tree(ranked_tags[:i]) + lower_bound = 0 + upper_bound = num_tags + best_tree = None + + while lower_bound <= upper_bound: + middle = (lower_bound + upper_bound) // 2 + tree = to_tree(ranked_tags[:middle]) num_tokens = self.token_count(tree) - dump(i, num_tokens) + if num_tokens < max_map_tokens: - return tree + best_tree = tree + lower_bound = middle + 1 + else: + upper_bound = middle - 1 + + return best_tree def find_py_files(directory):