Merge branch 'main' into ts-pack

This commit is contained in:
Paul Gauthier 2024-11-27 08:54:03 -08:00
commit ee837889db
221 changed files with 19622 additions and 3306 deletions

View file

@ -2,6 +2,8 @@ import colorsys
import math
import os
import random
import shutil
import sqlite3
import sys
import time
import warnings
@ -13,10 +15,10 @@ from diskcache import Cache
from grep_ast import TreeContext, filename_to_lang
from pygments.lexers import guess_lexer_for_filename
from pygments.token import Token
from pygments.util import ClassNotFound
from tqdm import tqdm
from aider.dump import dump
from aider.special import filter_important_files
from aider.utils import Spinner
# tree_sitter is throwing a FutureWarning
@ -26,6 +28,9 @@ from tree_sitter_language_pack import get_language, get_parser # noqa: E402
Tag = namedtuple("Tag", "rel_fname fname line name kind".split())
SQLITE_ERRORS = (sqlite3.OperationalError, sqlite3.DatabaseError, OSError)
class RepoMap:
CACHE_VERSION = 3
TAGS_CACHE_DIR = f".aider.tags.cache.v{CACHE_VERSION}"
@ -155,17 +160,59 @@ class RepoMap:
return repo_content
def get_rel_fname(self, fname):
return os.path.relpath(fname, self.root)
try:
return os.path.relpath(fname, self.root)
except ValueError:
# Issue #1288: ValueError: path is on mount 'C:', start on mount 'D:'
# Just return the full fname.
return fname
def split_path(self, path):
path = os.path.relpath(path, self.root)
return [path + ":"]
def tags_cache_error(self, original_error=None):
"""Handle SQLite errors by trying to recreate cache, falling back to dict if needed"""
if self.verbose and original_error:
self.io.tool_warning(f"Tags cache error: {str(original_error)}")
if isinstance(getattr(self, "TAGS_CACHE", None), dict):
return
path = Path(self.root) / self.TAGS_CACHE_DIR
# Try to recreate the cache
try:
# Delete existing cache dir
if path.exists():
shutil.rmtree(path)
# Try to create new cache
new_cache = Cache(path)
# Test that it works
test_key = "test"
new_cache[test_key] = "test"
_ = new_cache[test_key]
del new_cache[test_key]
# If we got here, the new cache works
self.TAGS_CACHE = new_cache
return
except SQLITE_ERRORS as e:
# If anything goes wrong, warn and fall back to dict
self.io.tool_warning(
f"Unable to use tags cache at {path}, falling back to memory cache"
)
if self.verbose:
self.io.tool_warning(f"Cache recreation error: {str(e)}")
self.TAGS_CACHE = dict()
def load_tags_cache(self):
path = Path(self.root) / self.TAGS_CACHE_DIR
if not path.exists():
self.cache_missing = True
self.TAGS_CACHE = Cache(path)
try:
self.TAGS_CACHE = Cache(path)
except SQLITE_ERRORS as e:
self.tags_cache_error(e)
def save_tags_cache(self):
pass
@ -174,7 +221,7 @@ class RepoMap:
try:
return os.path.getmtime(fname)
except FileNotFoundError:
self.io.tool_error(f"File not found error: {fname}")
self.io.tool_warning(f"File not found error: {fname}")
def get_tags(self, fname, rel_fname):
# Check if the file is in the cache and if the modification time has not changed
@ -183,15 +230,30 @@ class RepoMap:
return []
cache_key = fname
if cache_key in self.TAGS_CACHE and self.TAGS_CACHE[cache_key]["mtime"] == file_mtime:
return self.TAGS_CACHE[cache_key]["data"]
try:
val = self.TAGS_CACHE.get(cache_key) # Issue #1308
except SQLITE_ERRORS as e:
self.tags_cache_error(e)
val = self.TAGS_CACHE.get(cache_key)
if val is not None and val.get("mtime") == file_mtime:
try:
return self.TAGS_CACHE[cache_key]["data"]
except SQLITE_ERRORS as e:
self.tags_cache_error(e)
return self.TAGS_CACHE[cache_key]["data"]
# miss!
data = list(self.get_tags_raw(fname, rel_fname))
# Update the cache
self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data}
self.save_tags_cache()
try:
self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data}
self.save_tags_cache()
except SQLITE_ERRORS as e:
self.tags_cache_error(e)
self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data}
return data
def get_tags_raw(self, fname, rel_fname):
@ -199,8 +261,12 @@ class RepoMap:
if not lang:
return
language = get_language(lang)
parser = get_parser(lang)
try:
language = get_language(lang)
parser = get_parser(lang)
except Exception as err:
print(f"Skipping file {fname}: {err}")
return
query_scm = get_scm_fname(lang)
if not query_scm.exists():
@ -253,7 +319,8 @@ class RepoMap:
try:
lexer = guess_lexer_for_filename(fname, code)
except ClassNotFound:
except Exception: # On Windows, bad ref to time.clock which is deprecated?
# self.io.tool_error(f"Error lexing {fname}")
return
tokens = list(lexer.get_tokens(code))
@ -288,7 +355,13 @@ class RepoMap:
# https://networkx.org/documentation/stable/_modules/networkx/algorithms/link_analysis/pagerank_alg.html#pagerank
personalize = 100 / len(fnames)
if len(fnames) - len(self.TAGS_CACHE) > 100:
try:
cache_size = len(self.TAGS_CACHE)
except SQLITE_ERRORS as e:
self.tags_cache_error(e)
cache_size = len(self.TAGS_CACHE)
if len(fnames) - cache_size > 100:
self.io.tool_output(
"Initial repo scan can be slow in larger repos, but only happens once."
)
@ -298,19 +371,23 @@ class RepoMap:
showing_bar = False
for fname in fnames:
if self.verbose:
self.io.tool_output(f"Processing {fname}")
if progress and not showing_bar:
progress()
if not Path(fname).is_file():
if fname not in self.warned_files:
if Path(fname).exists():
self.io.tool_error(
f"Repo-map can't include {fname}, it is not a normal file"
)
else:
self.io.tool_error(f"Repo-map can't include {fname}, it no longer exists")
try:
file_ok = Path(fname).is_file()
except OSError:
file_ok = False
self.warned_files.add(fname)
if not file_ok:
if fname not in self.warned_files:
self.io.tool_warning(f"Repo-map can't include {fname}")
self.io.tool_output(
"Has it been deleted from the file system but not from git?"
)
self.warned_files.add(fname)
continue
# dump(fname)
@ -382,7 +459,11 @@ class RepoMap:
try:
ranked = nx.pagerank(G, weight="weight", **pers_args)
except ZeroDivisionError:
return []
# Issue #1536
try:
ranked = nx.pagerank(G, weight="weight")
except ZeroDivisionError:
return []
# distribute the rank from each source node, across all of its out edges
ranked_definitions = defaultdict(float)
@ -399,7 +480,9 @@ class RepoMap:
ranked_definitions[(dst, ident)] += data["rank"]
ranked_tags = []
ranked_definitions = sorted(ranked_definitions.items(), reverse=True, key=lambda x: x[1])
ranked_definitions = sorted(
ranked_definitions.items(), reverse=True, key=lambda x: (x[1], x[0])
)
# dump(ranked_definitions)
@ -435,12 +518,20 @@ class RepoMap:
force_refresh=False,
):
# Create a cache key
cache_key = (
cache_key = [
tuple(sorted(chat_fnames)) if chat_fnames else None,
tuple(sorted(other_fnames)) if other_fnames else None,
max_map_tokens,
)
]
if self.refresh == "auto":
cache_key += [
tuple(sorted(mentioned_fnames)) if mentioned_fnames else None,
tuple(sorted(mentioned_idents)) if mentioned_idents else None,
]
cache_key = tuple(cache_key)
use_cache = False
if not force_refresh:
if self.refresh == "manual" and self.last_map:
return self.last_map
@ -497,6 +588,14 @@ class RepoMap:
progress=spin.step,
)
other_rel_fnames = sorted(set(self.get_rel_fname(fname) for fname in other_fnames))
special_fnames = filter_important_files(other_rel_fnames)
ranked_tags_fnames = set(tag[0] for tag in ranked_tags)
special_fnames = [fn for fn in special_fnames if fn not in ranked_tags_fnames]
special_fnames = [(fn,) for fn in special_fnames]
ranked_tags = special_fnames + ranked_tags
spin.step()
num_tags = len(ranked_tags)