From de0cfe4d39a847f864d98dd2a40480bea96fe820 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sat, 19 Aug 2023 19:24:42 -0700 Subject: [PATCH] refac to remove ctags --- aider/coders/base_coder.py | 13 +-- aider/repomap.py | 175 ++++--------------------------------- 2 files changed, 20 insertions(+), 168 deletions(-) diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index a2ccde829..f780f9636 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -175,17 +175,10 @@ class Coder: self.verbose, ) - if self.repo_map.use_ctags: - self.io.tool_output(f"Repo-map: universal-ctags using {map_tokens} tokens") - elif not self.repo_map.has_ctags and map_tokens > 0: - self.io.tool_output( - f"Repo-map: basic using {map_tokens} tokens" - f" ({self.repo_map.ctags_disabled_reason})" - ) - else: - self.io.tool_output("Repo-map: disabled because map_tokens == 0") + if map_tokens > 0: + self.io.tool_output(f"Repo-map: universal-ctags using {map_tokens} tokens") else: - self.io.tool_output("Repo-map: disabled") + self.io.tool_output("Repo-map: disabled because map_tokens == 0") for fname in self.get_inchat_relative_files(): self.io.tool_output(f"Added {fname} to the chat.") diff --git a/aider/repomap.py b/aider/repomap.py index 5f9ae3a8a..fffa58ec3 100644 --- a/aider/repomap.py +++ b/aider/repomap.py @@ -1,19 +1,13 @@ import colorsys -import json import os import random -import subprocess import sys -import tempfile from collections import Counter, defaultdict from pathlib import Path import networkx as nx import tiktoken from diskcache import Cache -from pygments.lexers import guess_lexer_for_filename -from pygments.token import Token -from pygments.util import ClassNotFound from tqdm import tqdm from aider import models @@ -62,19 +56,9 @@ def fname_to_components(fname, with_colon): class RepoMap: - CACHE_VERSION = 1 - ctags_cmd = [ - "ctags", - "--fields=+S", - "--extras=-F", - "--output-format=json", - "--output-encoding=utf-8", - ] - IDENT_CACHE_DIR = f".aider.ident.cache.v{CACHE_VERSION}" + CACHE_VERSION = 2 TAGS_CACHE_DIR = f".aider.tags.cache.v{CACHE_VERSION}" - ctags_disabled_reason = "ctags not initialized" - cache_missing = False def __init__( @@ -93,26 +77,27 @@ class RepoMap: root = os.getcwd() self.root = root - self.load_ident_cache() self.load_tags_cache() self.max_map_tokens = map_tokens - self.has_ctags = self.check_for_ctags() - - if map_tokens > 0 and self.has_ctags: - self.use_ctags = True - else: - self.use_ctags = False self.tokenizer = tiktoken.encoding_for_model(main_model.name) self.repo_content_prefix = repo_content_prefix def get_repo_map(self, chat_files, other_files): - res = self.choose_files_listing(chat_files, other_files) - if not res: + if self.max_map_tokens <= 0: return - files_listing, ctags_msg = res + if not other_files: + return + + files_listing = self.get_ranked_tags_map(chat_files, other_files) + if not files_listing: + return + + num_tokens = self.token_count(files_listing) + if self.verbose: + self.io.tool_output(f"ctags map: {num_tokens/1024:.1f} k-tokens") if chat_files: other = "other " @@ -120,10 +105,7 @@ class RepoMap: other = "" if self.repo_content_prefix: - repo_content = self.repo_content_prefix.format( - other=other, - ctags_msg=ctags_msg, - ) + repo_content = self.repo_content_prefix.format(other=other) else: repo_content = "" @@ -131,30 +113,6 @@ class RepoMap: return repo_content - def choose_files_listing(self, chat_files, other_files): - if self.max_map_tokens <= 0: - return - - if not other_files: - return - - if self.use_ctags: - files_listing = self.get_ranked_tags_map(chat_files, other_files) - if files_listing: - num_tokens = self.token_count(files_listing) - if self.verbose: - self.io.tool_output(f"ctags map: {num_tokens/1024:.1f} k-tokens") - ctags_msg = " with selected ctags info" - return files_listing, ctags_msg - - files_listing = self.get_simple_files_map(other_files) - ctags_msg = "" - num_tokens = self.token_count(files_listing) - if self.verbose: - self.io.tool_output(f"simple map: {num_tokens/1024:.1f} k-tokens") - if num_tokens < self.max_map_tokens: - return files_listing, ctags_msg - def get_simple_files_map(self, other_files): fnames = [] for fname in other_files: @@ -174,66 +132,6 @@ class RepoMap: path = os.path.relpath(path, self.root) return [path + ":"] - def run_ctags(self, filename): - # Check if the file is in the cache and if the modification time has not changed - file_mtime = self.get_mtime(filename) - if file_mtime is None: - return [] - - cache_key = filename - if cache_key in self.TAGS_CACHE and self.TAGS_CACHE[cache_key]["mtime"] == file_mtime: - return self.TAGS_CACHE[cache_key]["data"] - - cmd = self.ctags_cmd + [ - f"--input-encoding={self.io.encoding}", - filename, - ] - output = subprocess.check_output(cmd, stderr=subprocess.PIPE).decode("utf-8") - output_lines = output.splitlines() - - data = [] - for line in output_lines: - try: - data.append(json.loads(line)) - except json.decoder.JSONDecodeError as err: - self.io.tool_error(f"Error parsing ctags output: {err}") - self.io.tool_error(repr(line)) - - # Update the cache - self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data} - self.save_tags_cache() - return data - - def check_for_ctags(self): - try: - executable = self.ctags_cmd[0] - cmd = [executable, "--version"] - output = subprocess.check_output(cmd, stderr=subprocess.PIPE).decode("utf-8") - output = output.lower() - - cmd = " ".join(cmd) - - if "universal ctags" not in output: - self.ctags_disabled_reason = f"{cmd} does not claim to be universal ctags" - return - if "+json" not in output: - self.ctags_disabled_reason = f"{cmd} does not list +json support" - return - - with tempfile.TemporaryDirectory() as tempdir: - hello_py = os.path.join(tempdir, "hello.py") - with open(hello_py, "w", encoding="utf-8") as f: - f.write("def hello():\n print('Hello, world!')\n") - self.run_ctags(hello_py) - except FileNotFoundError: - self.ctags_disabled_reason = f"{executable} executable not found" - return - except Exception as err: - self.ctags_disabled_reason = f"error running universal-ctags: {err}" - return - - return True - def load_tags_cache(self): path = Path(self.root) / self.TAGS_CACHE_DIR if not path.exists(): @@ -243,53 +141,12 @@ class RepoMap: def save_tags_cache(self): pass - def load_ident_cache(self): - path = Path(self.root) / self.IDENT_CACHE_DIR - if not path.exists(): - self.cache_missing = True - self.IDENT_CACHE = Cache(path) - - def save_ident_cache(self): - pass - def get_mtime(self, fname): try: return os.path.getmtime(fname) except FileNotFoundError: self.io.tool_error(f"File not found error: {fname}") - def get_name_identifiers(self, fname, uniq=True): - file_mtime = self.get_mtime(fname) - if file_mtime is None: - return set() - - cache_key = fname - if cache_key in self.IDENT_CACHE and self.IDENT_CACHE[cache_key]["mtime"] == file_mtime: - idents = self.IDENT_CACHE[cache_key]["data"] - else: - idents = self.get_name_identifiers_uncached(fname) - self.IDENT_CACHE[cache_key] = {"mtime": file_mtime, "data": idents} - self.save_ident_cache() - - if uniq: - idents = set(idents) - return idents - - def get_name_identifiers_uncached(self, fname): - content = self.io.read_text(fname) - if content is None: - return list() - - try: - lexer = guess_lexer_for_filename(fname, content) - except ClassNotFound: - return list() - - # lexer.get_tokens_unprocessed() returns (char position in file, token type, token string) - tokens = list(lexer.get_tokens_unprocessed(content)) - res = [token[2] for token in tokens if token[1] in Token.Name] - return res - def get_ranked_tags(self, chat_fnames, other_fnames): defines = defaultdict(set) references = defaultdict(list) @@ -318,7 +175,8 @@ class RepoMap: personalization[rel_fname] = 1.0 chat_rel_fnames.add(rel_fname) - data = self.run_ctags(fname) + # TODO + data = [] for tag in data: ident = tag["name"] @@ -342,7 +200,8 @@ class RepoMap: definitions[key].add(tuple(res)) # definitions[key].add((rel_fname,)) - idents = self.get_name_identifiers(fname, uniq=False) + # TODO + idents = [] for ident in idents: # dump("ref", fname, ident) references[ident].append(rel_fname)