This commit is contained in:
Paul Gauthier 2024-05-14 11:12:36 -07:00
parent 851e98a398
commit 73e6949287
2 changed files with 13 additions and 21 deletions

View file

@ -762,7 +762,7 @@ class Coder:
) )
] ]
def check_for_file_mentions(self, content): def get_file_mentions(self, content):
words = set(word for word in content.split()) words = set(word for word in content.split())
# drop sentence punctuation from the end # drop sentence punctuation from the end
@ -781,14 +781,22 @@ class Coder:
mentioned_rel_fnames.add(str(rel_fname)) mentioned_rel_fnames.add(str(rel_fname))
fname = os.path.basename(rel_fname) fname = os.path.basename(rel_fname)
if fname not in fname_to_rel_fnames:
fname_to_rel_fnames[fname] = [] # Don't add basenames that could be plain words like "run" or "make"
fname_to_rel_fnames[fname].append(rel_fname) if "/" in fname or "." in fname:
if fname not in fname_to_rel_fnames:
fname_to_rel_fnames[fname] = []
fname_to_rel_fnames[fname].append(rel_fname)
for fname, rel_fnames in fname_to_rel_fnames.items(): for fname, rel_fnames in fname_to_rel_fnames.items():
if len(rel_fnames) == 1 and fname in words: if len(rel_fnames) == 1 and fname in words:
mentioned_rel_fnames.add(rel_fnames[0]) mentioned_rel_fnames.add(rel_fnames[0])
return mentioned_rel_fnames
def check_for_file_mentions(self, content):
mentioned_rel_fnames = self.get_file_mentions(content)
if not mentioned_rel_fnames: if not mentioned_rel_fnames:
return return

View file

@ -225,7 +225,7 @@ class RepoMap:
fnames = sorted(fnames) fnames = sorted(fnames)
if self.cache_missing: if self.cache_missing or True:
fnames = tqdm(fnames) fnames = tqdm(fnames)
self.cache_missing = False self.cache_missing = False
@ -338,25 +338,16 @@ class RepoMap:
if not max_map_tokens: if not max_map_tokens:
max_map_tokens = self.max_map_tokens max_map_tokens = self.max_map_tokens
dump(max_map_tokens, self.max_map_tokens)
ranked_tags = self.get_ranked_tags(chat_fnames, other_fnames) ranked_tags = self.get_ranked_tags(chat_fnames, other_fnames)
# dump(ranked_tags)
num_tags = len(ranked_tags) num_tags = len(ranked_tags)
dump(num_tags, max_map_tokens)
lower_bound = 0 lower_bound = 0
upper_bound = num_tags upper_bound = num_tags
best_tree = None best_tree = None
best_tree_tokens = 0 best_tree_tokens = 0
chat_rel_fnames = [self.get_rel_fname(fname) for fname in chat_fnames] chat_rel_fnames = [self.get_rel_fname(fname) for fname in chat_fnames]
print("#" * 80)
# Guess a small starting number to help with giant repos # Guess a small starting number to help with giant repos
middle = min(max_map_tokens // 25, num_tags) middle = min(max_map_tokens // 25, num_tags)
@ -366,14 +357,7 @@ class RepoMap:
tree = self.to_tree(ranked_tags[:middle], chat_rel_fnames) tree = self.to_tree(ranked_tags[:middle], chat_rel_fnames)
num_tokens = self.token_count(tree) num_tokens = self.token_count(tree)
dump(lower_bound, middle, upper_bound)
dump(num_tokens)
dump(num_tokens / middle)
# dump(len(tree))
if num_tokens < max_map_tokens and num_tokens > best_tree_tokens: if num_tokens < max_map_tokens and num_tokens > best_tree_tokens:
print(f"best_tree: {num_tokens} tokens, {middle} middle")
best_tree = tree best_tree = tree
best_tree_tokens = num_tokens best_tree_tokens = num_tokens