diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 477446eb9..1c94beff6 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -8,6 +8,7 @@ import sys import threading import time import traceback +from collections import defaultdict from json.decoder import JSONDecodeError from pathlib import Path @@ -472,6 +473,21 @@ class Coder: words = set(re.split(r"\W+", text)) return words + def get_ident_filename_matches(self, idents): + all_fnames = defaultdict(set) + for fname in self.get_all_relative_files(): + base = Path(fname).with_suffix("").name.lower() + if len(base) >= 5: + all_fnames[base].add(fname) + + matches = set() + for ident in idents: + if len(ident) < 5: + continue + matches.update(all_fnames[ident.lower()]) + + return matches + def get_repo_map(self): if not self.repo_map: return @@ -480,6 +496,8 @@ class Coder: mentioned_fnames = self.get_file_mentions(cur_msg_text) mentioned_idents = self.get_ident_mentions(cur_msg_text) + mentioned_fnames.update(self.get_ident_filename_matches(mentioned_idents)) + other_files = set(self.get_all_abs_files()) - set(self.abs_fnames) repo_content = self.repo_map.get_repo_map( self.abs_fnames, diff --git a/aider/repomap.py b/aider/repomap.py index 38a30cd35..20b675549 100644 --- a/aider/repomap.py +++ b/aider/repomap.py @@ -1,4 +1,5 @@ import colorsys +import math import os import random import sys @@ -242,7 +243,7 @@ class RepoMap: # Default personalization for unspecified files is 1/num_nodes # https://networkx.org/documentation/stable/_modules/networkx/algorithms/link_analysis/pagerank_alg.html#pagerank - personalize = 10 / len(fnames) + personalize = 100 / len(fnames) if self.cache_missing: fnames = tqdm(fnames) @@ -268,7 +269,7 @@ class RepoMap: personalization[rel_fname] = personalize chat_rel_fnames.add(rel_fname) - if fname in mentioned_fnames: + if rel_fname in mentioned_fnames: personalization[rel_fname] = personalize tags = list(self.get_tags(fname, rel_fname)) @@ -300,12 +301,20 @@ class RepoMap: definers = defines[ident] if ident in mentioned_idents: mul = 10 + elif ident.startswith("_"): + mul = 0.1 else: mul = 1 + for referencer, num_refs in Counter(references[ident]).items(): for definer in definers: + # dump(referencer, definer, num_refs, mul) # if referencer == definer: # continue + + # scale down so high freq (low value) mentions don't dominate + num_refs = math.sqrt(num_refs) + G.add_edge(referencer, definer, weight=mul * num_refs, ident=ident) if not references: