aider: Populate self.words using Pygments lexer to tokenize the file content and add all the Token.Name tokens to self.words.

# Context: USER: in FileContentCompleter instead of populating self.words by splitting words with a regex, run the pygments lexer on each file and add all the Token.Name tokens to words. ask the lexer to guess the language type. ASSISTANT: To use Pygments lexer to populate `self.words`, you need to first install the Pygments package by adding it to your `requirements.txt` file. Then, you can use the `guess_lexer_for_filename` function to guess the lexer based on the file name and use the lexer to tokenize the file content. Finally, you can filter the tokens by `Token.Name` and add them to `self.words`. Here are the changes you need to make: ```python requirements.txt <<<<<<< ORIGINAL ======= pygments >>>>>>> UPDATED ``` ```python aider/getinput.py <<<<<<< ORIGINAL import os import re ======= import os from pygments import highlight from pygments.lexers import guess_lexer_for_filename from pygments.token import Token >>>>>>> UPDATED ``` ```python aider/getinput.py <<<<<<< ORIGINAL def __init__(self, fnames, commands): self.commands = commands self.words = set() for fname in fnames: with open(fname, "r") as f: content = f.read() self.words.update(re.split(r"\W+", content)) ======= def __init__(self, fnames, commands): self.commands = commands self.words = set() for fname in fnames: with open(fname, "r") as f: content = f.read() lexer = guess_lexer_for_filename(fname, content) tokens = list(lexer.get_tokens(content)) self.words.update( token[1] for token in tokens if token[0] in Token.Name ) >>>>>>> UPDATED ``` These changes will use the Pygments lexer to populate `self.words` with the `Token.Name` tokens.
2025-05-28 16:25:00 +00:00 · 2023-05-10 15:26:40 -07:00 · 2023-05-10 15:26:40 -07:00 · 9bd635a7a4
commit 9bd635a7a4
parent 7cc2b59a25
1 changed files with 8 additions and 4 deletions
--- a/aider/getinput.py
+++ b/aider/getinput.py
@ -1,6 +1,7 @@
 import os
-import re
-
+from pygments import highlight
+from pygments.lexers import guess_lexer_for_filename
+from pygments.token import Token
 from prompt_toolkit.styles import Style

 from prompt_toolkit import prompt
@ -21,8 +22,11 @@ class FileContentCompleter(Completer):
        for fname in fnames:
            with open(fname, "r") as f:
                content = f.read()
-            self.words.update(re.split(r"\W+", content))
-
+            lexer = guess_lexer_for_filename(fname, content)
+            tokens = list(lexer.get_tokens(content))
+            self.words.update(
+                token[1] for token in tokens if token[0] in Token.Name
+            )
    def get_completions(self, document, complete_event):
        text = document.text_before_cursor
        words = text.split()