Notice if the user mentions the basename of a filename; fixes to repomap context

This commit is contained in:
Paul Gauthier 2024-06-24 15:12:58 -07:00
parent 7fad06e74c
commit 965c35870c
2 changed files with 29 additions and 2 deletions

View file

@ -8,6 +8,7 @@ import sys
import threading import threading
import time import time
import traceback import traceback
from collections import defaultdict
from json.decoder import JSONDecodeError from json.decoder import JSONDecodeError
from pathlib import Path from pathlib import Path
@ -472,6 +473,21 @@ class Coder:
words = set(re.split(r"\W+", text)) words = set(re.split(r"\W+", text))
return words return words
def get_ident_filename_matches(self, idents):
all_fnames = defaultdict(set)
for fname in self.get_all_relative_files():
base = Path(fname).with_suffix("").name.lower()
if len(base) >= 5:
all_fnames[base].add(fname)
matches = set()
for ident in idents:
if len(ident) < 5:
continue
matches.update(all_fnames[ident.lower()])
return matches
def get_repo_map(self): def get_repo_map(self):
if not self.repo_map: if not self.repo_map:
return return
@ -480,6 +496,8 @@ class Coder:
mentioned_fnames = self.get_file_mentions(cur_msg_text) mentioned_fnames = self.get_file_mentions(cur_msg_text)
mentioned_idents = self.get_ident_mentions(cur_msg_text) mentioned_idents = self.get_ident_mentions(cur_msg_text)
mentioned_fnames.update(self.get_ident_filename_matches(mentioned_idents))
other_files = set(self.get_all_abs_files()) - set(self.abs_fnames) other_files = set(self.get_all_abs_files()) - set(self.abs_fnames)
repo_content = self.repo_map.get_repo_map( repo_content = self.repo_map.get_repo_map(
self.abs_fnames, self.abs_fnames,

View file

@ -1,4 +1,5 @@
import colorsys import colorsys
import math
import os import os
import random import random
import sys import sys
@ -242,7 +243,7 @@ class RepoMap:
# Default personalization for unspecified files is 1/num_nodes # Default personalization for unspecified files is 1/num_nodes
# https://networkx.org/documentation/stable/_modules/networkx/algorithms/link_analysis/pagerank_alg.html#pagerank # https://networkx.org/documentation/stable/_modules/networkx/algorithms/link_analysis/pagerank_alg.html#pagerank
personalize = 10 / len(fnames) personalize = 100 / len(fnames)
if self.cache_missing: if self.cache_missing:
fnames = tqdm(fnames) fnames = tqdm(fnames)
@ -268,7 +269,7 @@ class RepoMap:
personalization[rel_fname] = personalize personalization[rel_fname] = personalize
chat_rel_fnames.add(rel_fname) chat_rel_fnames.add(rel_fname)
if fname in mentioned_fnames: if rel_fname in mentioned_fnames:
personalization[rel_fname] = personalize personalization[rel_fname] = personalize
tags = list(self.get_tags(fname, rel_fname)) tags = list(self.get_tags(fname, rel_fname))
@ -300,12 +301,20 @@ class RepoMap:
definers = defines[ident] definers = defines[ident]
if ident in mentioned_idents: if ident in mentioned_idents:
mul = 10 mul = 10
elif ident.startswith("_"):
mul = 0.1
else: else:
mul = 1 mul = 1
for referencer, num_refs in Counter(references[ident]).items(): for referencer, num_refs in Counter(references[ident]).items():
for definer in definers: for definer in definers:
# dump(referencer, definer, num_refs, mul)
# if referencer == definer: # if referencer == definer:
# continue # continue
# scale down so high freq (low value) mentions don't dominate
num_refs = math.sqrt(num_refs)
G.add_edge(referencer, definer, weight=mul * num_refs, ident=ident) G.add_edge(referencer, definer, weight=mul * num_refs, ident=ident)
if not references: if not references: