Notice if the user mentions the basename of a filename; fixes to repomap context

This commit is contained in:
Paul Gauthier 2024-06-24 15:12:58 -07:00
parent 7fad06e74c
commit 965c35870c
2 changed files with 29 additions and 2 deletions

View file

@ -8,6 +8,7 @@ import sys
import threading
import time
import traceback
from collections import defaultdict
from json.decoder import JSONDecodeError
from pathlib import Path
@ -472,6 +473,21 @@ class Coder:
words = set(re.split(r"\W+", text))
return words
def get_ident_filename_matches(self, idents):
all_fnames = defaultdict(set)
for fname in self.get_all_relative_files():
base = Path(fname).with_suffix("").name.lower()
if len(base) >= 5:
all_fnames[base].add(fname)
matches = set()
for ident in idents:
if len(ident) < 5:
continue
matches.update(all_fnames[ident.lower()])
return matches
def get_repo_map(self):
if not self.repo_map:
return
@ -480,6 +496,8 @@ class Coder:
mentioned_fnames = self.get_file_mentions(cur_msg_text)
mentioned_idents = self.get_ident_mentions(cur_msg_text)
mentioned_fnames.update(self.get_ident_filename_matches(mentioned_idents))
other_files = set(self.get_all_abs_files()) - set(self.abs_fnames)
repo_content = self.repo_map.get_repo_map(
self.abs_fnames,

View file

@ -1,4 +1,5 @@
import colorsys
import math
import os
import random
import sys
@ -242,7 +243,7 @@ class RepoMap:
# Default personalization for unspecified files is 1/num_nodes
# https://networkx.org/documentation/stable/_modules/networkx/algorithms/link_analysis/pagerank_alg.html#pagerank
personalize = 10 / len(fnames)
personalize = 100 / len(fnames)
if self.cache_missing:
fnames = tqdm(fnames)
@ -268,7 +269,7 @@ class RepoMap:
personalization[rel_fname] = personalize
chat_rel_fnames.add(rel_fname)
if fname in mentioned_fnames:
if rel_fname in mentioned_fnames:
personalization[rel_fname] = personalize
tags = list(self.get_tags(fname, rel_fname))
@ -300,12 +301,20 @@ class RepoMap:
definers = defines[ident]
if ident in mentioned_idents:
mul = 10
elif ident.startswith("_"):
mul = 0.1
else:
mul = 1
for referencer, num_refs in Counter(references[ident]).items():
for definer in definers:
# dump(referencer, definer, num_refs, mul)
# if referencer == definer:
# continue
# scale down so high freq (low value) mentions don't dominate
num_refs = math.sqrt(num_refs)
G.add_edge(referencer, definer, weight=mul * num_refs, ident=ident)
if not references: