From 9bd635a7a4d86a2bd307a769b7f6fa9548a79cce Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Wed, 10 May 2023 15:26:40 -0700 Subject: [PATCH] aider: Populate `self.words` using Pygments lexer to tokenize the file content and add all the `Token.Name` tokens to `self.words`. # Context: USER: in FileContentCompleter instead of populating self.words by splitting words with a regex, run the pygments lexer on each file and add all the Token.Name tokens to words. ask the lexer to guess the language type. ASSISTANT: To use Pygments lexer to populate `self.words`, you need to first install the Pygments package by adding it to your `requirements.txt` file. Then, you can use the `guess_lexer_for_filename` function to guess the lexer based on the file name and use the lexer to tokenize the file content. Finally, you can filter the tokens by `Token.Name` and add them to `self.words`. Here are the changes you need to make: ```python requirements.txt <<<<<<< ORIGINAL ======= pygments >>>>>>> UPDATED ``` ```python aider/getinput.py <<<<<<< ORIGINAL import os import re ======= import os from pygments import highlight from pygments.lexers import guess_lexer_for_filename from pygments.token import Token >>>>>>> UPDATED ``` ```python aider/getinput.py <<<<<<< ORIGINAL def __init__(self, fnames, commands): self.commands = commands self.words = set() for fname in fnames: with open(fname, "r") as f: content = f.read() self.words.update(re.split(r"\W+", content)) ======= def __init__(self, fnames, commands): self.commands = commands self.words = set() for fname in fnames: with open(fname, "r") as f: content = f.read() lexer = guess_lexer_for_filename(fname, content) tokens = list(lexer.get_tokens(content)) self.words.update( token[1] for token in tokens if token[0] in Token.Name ) >>>>>>> UPDATED ``` These changes will use the Pygments lexer to populate `self.words` with the `Token.Name` tokens. --- aider/getinput.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/aider/getinput.py b/aider/getinput.py index cdc73e606..d2794c23f 100644 --- a/aider/getinput.py +++ b/aider/getinput.py @@ -1,6 +1,7 @@ import os -import re - +from pygments import highlight +from pygments.lexers import guess_lexer_for_filename +from pygments.token import Token from prompt_toolkit.styles import Style from prompt_toolkit import prompt @@ -21,8 +22,11 @@ class FileContentCompleter(Completer): for fname in fnames: with open(fname, "r") as f: content = f.read() - self.words.update(re.split(r"\W+", content)) - + lexer = guess_lexer_for_filename(fname, content) + tokens = list(lexer.get_tokens(content)) + self.words.update( + token[1] for token in tokens if token[0] in Token.Name + ) def get_completions(self, document, complete_event): text = document.text_before_cursor words = text.split()