Merge branch 'main' into ts-pack

2025-05-28 08:14:59 +00:00 · 2024-11-27 08:54:03 -08:00 · 2024-11-27 08:54:03 -08:00 · ee837889db
commit ee837889db
parent a5f4cba72f f44e5ae5f9
221 changed files with 19622 additions and 3306 deletions
--- a/aider/repomap.py
+++ b/aider/repomap.py
@ -2,6 +2,8 @@ import colorsys
 import math
 import os
 import random
+import shutil
+import sqlite3
 import sys
 import time
 import warnings
@ -13,10 +15,10 @@ from diskcache import Cache
 from grep_ast import TreeContext, filename_to_lang
 from pygments.lexers import guess_lexer_for_filename
 from pygments.token import Token
-from pygments.util import ClassNotFound
 from tqdm import tqdm

 from aider.dump import dump
+from aider.special import filter_important_files
 from aider.utils import Spinner

 # tree_sitter is throwing a FutureWarning
@ -26,6 +28,9 @@ from tree_sitter_language_pack import get_language, get_parser  # noqa: E402
 Tag = namedtuple("Tag", "rel_fname fname line name kind".split())


+SQLITE_ERRORS = (sqlite3.OperationalError, sqlite3.DatabaseError, OSError)
+
+
 class RepoMap:
    CACHE_VERSION = 3
    TAGS_CACHE_DIR = f".aider.tags.cache.v{CACHE_VERSION}"
@ -155,17 +160,59 @@ class RepoMap:
        return repo_content

    def get_rel_fname(self, fname):
-        return os.path.relpath(fname, self.root)
+        try:
+            return os.path.relpath(fname, self.root)
+        except ValueError:
+            # Issue #1288: ValueError: path is on mount 'C:', start on mount 'D:'
+            # Just return the full fname.
+            return fname

-    def split_path(self, path):
-        path = os.path.relpath(path, self.root)
-        return [path + ":"]
+    def tags_cache_error(self, original_error=None):
+        """Handle SQLite errors by trying to recreate cache, falling back to dict if needed"""
+
+        if self.verbose and original_error:
+            self.io.tool_warning(f"Tags cache error: {str(original_error)}")
+
+        if isinstance(getattr(self, "TAGS_CACHE", None), dict):
+            return
+
+        path = Path(self.root) / self.TAGS_CACHE_DIR
+
+        # Try to recreate the cache
+        try:
+            # Delete existing cache dir
+            if path.exists():
+                shutil.rmtree(path)
+
+            # Try to create new cache
+            new_cache = Cache(path)
+
+            # Test that it works
+            test_key = "test"
+            new_cache[test_key] = "test"
+            _ = new_cache[test_key]
+            del new_cache[test_key]
+
+            # If we got here, the new cache works
+            self.TAGS_CACHE = new_cache
+            return
+
+        except SQLITE_ERRORS as e:
+            # If anything goes wrong, warn and fall back to dict
+            self.io.tool_warning(
+                f"Unable to use tags cache at {path}, falling back to memory cache"
+            )
+            if self.verbose:
+                self.io.tool_warning(f"Cache recreation error: {str(e)}")
+
+        self.TAGS_CACHE = dict()

    def load_tags_cache(self):
        path = Path(self.root) / self.TAGS_CACHE_DIR
-        if not path.exists():
-            self.cache_missing = True
-        self.TAGS_CACHE = Cache(path)
+        try:
+            self.TAGS_CACHE = Cache(path)
+        except SQLITE_ERRORS as e:
+            self.tags_cache_error(e)

    def save_tags_cache(self):
        pass
@ -174,7 +221,7 @@ class RepoMap:
        try:
            return os.path.getmtime(fname)
        except FileNotFoundError:
-            self.io.tool_error(f"File not found error: {fname}")
+            self.io.tool_warning(f"File not found error: {fname}")

    def get_tags(self, fname, rel_fname):
        # Check if the file is in the cache and if the modification time has not changed
@ -183,15 +230,30 @@ class RepoMap:
            return []

        cache_key = fname
-        if cache_key in self.TAGS_CACHE and self.TAGS_CACHE[cache_key]["mtime"] == file_mtime:
-            return self.TAGS_CACHE[cache_key]["data"]
+        try:
+            val = self.TAGS_CACHE.get(cache_key)  # Issue #1308
+        except SQLITE_ERRORS as e:
+            self.tags_cache_error(e)
+            val = self.TAGS_CACHE.get(cache_key)
+
+        if val is not None and val.get("mtime") == file_mtime:
+            try:
+                return self.TAGS_CACHE[cache_key]["data"]
+            except SQLITE_ERRORS as e:
+                self.tags_cache_error(e)
+                return self.TAGS_CACHE[cache_key]["data"]

        # miss!
        data = list(self.get_tags_raw(fname, rel_fname))

        # Update the cache
-        self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data}
-        self.save_tags_cache()
+        try:
+            self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data}
+            self.save_tags_cache()
+        except SQLITE_ERRORS as e:
+            self.tags_cache_error(e)
+            self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data}
+
        return data

    def get_tags_raw(self, fname, rel_fname):
@ -199,8 +261,12 @@ class RepoMap:
        if not lang:
            return

-        language = get_language(lang)
-        parser = get_parser(lang)
+        try:
+            language = get_language(lang)
+            parser = get_parser(lang)
+        except Exception as err:
+            print(f"Skipping file {fname}: {err}")
+            return

        query_scm = get_scm_fname(lang)
        if not query_scm.exists():
@ -253,7 +319,8 @@ class RepoMap:

        try:
            lexer = guess_lexer_for_filename(fname, code)
-        except ClassNotFound:
+        except Exception:  # On Windows, bad ref to time.clock which is deprecated?
+            # self.io.tool_error(f"Error lexing {fname}")
            return

        tokens = list(lexer.get_tokens(code))
@ -288,7 +355,13 @@ class RepoMap:
        # https://networkx.org/documentation/stable/_modules/networkx/algorithms/link_analysis/pagerank_alg.html#pagerank
        personalize = 100 / len(fnames)

-        if len(fnames) - len(self.TAGS_CACHE) > 100:
+        try:
+            cache_size = len(self.TAGS_CACHE)
+        except SQLITE_ERRORS as e:
+            self.tags_cache_error(e)
+            cache_size = len(self.TAGS_CACHE)
+
+        if len(fnames) - cache_size > 100:
            self.io.tool_output(
                "Initial repo scan can be slow in larger repos, but only happens once."
            )
@ -298,19 +371,23 @@ class RepoMap:
            showing_bar = False

        for fname in fnames:
+            if self.verbose:
+                self.io.tool_output(f"Processing {fname}")
            if progress and not showing_bar:
                progress()

-            if not Path(fname).is_file():
-                if fname not in self.warned_files:
-                    if Path(fname).exists():
-                        self.io.tool_error(
-                            f"Repo-map can't include {fname}, it is not a normal file"
-                        )
-                    else:
-                        self.io.tool_error(f"Repo-map can't include {fname}, it no longer exists")
+            try:
+                file_ok = Path(fname).is_file()
+            except OSError:
+                file_ok = False

-                self.warned_files.add(fname)
+            if not file_ok:
+                if fname not in self.warned_files:
+                    self.io.tool_warning(f"Repo-map can't include {fname}")
+                    self.io.tool_output(
+                        "Has it been deleted from the file system but not from git?"
+                    )
+                    self.warned_files.add(fname)
                continue

            # dump(fname)
@ -382,7 +459,11 @@ class RepoMap:
        try:
            ranked = nx.pagerank(G, weight="weight", **pers_args)
        except ZeroDivisionError:
-            return []
+            # Issue #1536
+            try:
+                ranked = nx.pagerank(G, weight="weight")
+            except ZeroDivisionError:
+                return []

        # distribute the rank from each source node, across all of its out edges
        ranked_definitions = defaultdict(float)
@ -399,7 +480,9 @@ class RepoMap:
                ranked_definitions[(dst, ident)] += data["rank"]

        ranked_tags = []
-        ranked_definitions = sorted(ranked_definitions.items(), reverse=True, key=lambda x: x[1])
+        ranked_definitions = sorted(
+            ranked_definitions.items(), reverse=True, key=lambda x: (x[1], x[0])
+        )

        # dump(ranked_definitions)

@ -435,12 +518,20 @@ class RepoMap:
        force_refresh=False,
    ):
        # Create a cache key
-        cache_key = (
+        cache_key = [
            tuple(sorted(chat_fnames)) if chat_fnames else None,
            tuple(sorted(other_fnames)) if other_fnames else None,
            max_map_tokens,
-        )
+        ]

+        if self.refresh == "auto":
+            cache_key += [
+                tuple(sorted(mentioned_fnames)) if mentioned_fnames else None,
+                tuple(sorted(mentioned_idents)) if mentioned_idents else None,
+            ]
+        cache_key = tuple(cache_key)
+
+        use_cache = False
        if not force_refresh:
            if self.refresh == "manual" and self.last_map:
                return self.last_map
@ -497,6 +588,14 @@ class RepoMap:
            progress=spin.step,
        )

+        other_rel_fnames = sorted(set(self.get_rel_fname(fname) for fname in other_fnames))
+        special_fnames = filter_important_files(other_rel_fnames)
+        ranked_tags_fnames = set(tag[0] for tag in ranked_tags)
+        special_fnames = [fn for fn in special_fnames if fn not in ranked_tags_fnames]
+        special_fnames = [(fn,) for fn in special_fnames]
+
+        ranked_tags = special_fnames + ranked_tags
+
        spin.step()

        num_tags = len(ranked_tags)