From 9bd635a7a4d86a2bd307a769b7f6fa9548a79cce Mon Sep 17 00:00:00 2001
From: Paul Gauthier <paul@geomagical.com>
Date: Wed, 10 May 2023 15:26:40 -0700
Subject: [PATCH] aider: Populate `self.words` using Pygments lexer to tokenize
 the file content and add all the `Token.Name` tokens to `self.words`.

# Context:
USER: in FileContentCompleter instead of populating self.words by splitting words with a regex, run the pygments lexer on each file and add all the Token.Name tokens to words. ask the lexer to guess the language type.
ASSISTANT: To use Pygments lexer to populate `self.words`, you need to first install the Pygments package by adding it to your `requirements.txt` file. Then, you can use the `guess_lexer_for_filename` function to guess the lexer based on the file name and use the lexer to tokenize the file content. Finally, you can filter the tokens by `Token.Name` and add them to `self.words`.

Here are the changes you need to make:

```python
requirements.txt
<<<<<<< ORIGINAL
=======
pygments
>>>>>>> UPDATED
```

```python
aider/getinput.py
<<<<<<< ORIGINAL
import os
import re
=======
import os
from pygments import highlight
from pygments.lexers import guess_lexer_for_filename
from pygments.token import Token
>>>>>>> UPDATED
```

```python
aider/getinput.py
<<<<<<< ORIGINAL
    def __init__(self, fnames, commands):
        self.commands = commands

        self.words = set()
        for fname in fnames:
            with open(fname, "r") as f:
                content = f.read()
            self.words.update(re.split(r"\W+", content))
=======
    def __init__(self, fnames, commands):
        self.commands = commands

        self.words = set()
        for fname in fnames:
            with open(fname, "r") as f:
                content = f.read()
            lexer = guess_lexer_for_filename(fname, content)
            tokens = list(lexer.get_tokens(content))
            self.words.update(
                token[1] for token in tokens if token[0] in Token.Name
            )
>>>>>>> UPDATED
```

These changes will use the Pygments lexer to populate `self.words` with the `Token.Name` tokens.
---
 aider/getinput.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/aider/getinput.py b/aider/getinput.py
index cdc73e606..d2794c23f 100644
--- a/aider/getinput.py
+++ b/aider/getinput.py
@@ -1,6 +1,7 @@
 import os
-import re
-
+from pygments import highlight
+from pygments.lexers import guess_lexer_for_filename
+from pygments.token import Token
 from prompt_toolkit.styles import Style
 
 from prompt_toolkit import prompt
@@ -21,8 +22,11 @@ class FileContentCompleter(Completer):
         for fname in fnames:
             with open(fname, "r") as f:
                 content = f.read()
-            self.words.update(re.split(r"\W+", content))
-
+            lexer = guess_lexer_for_filename(fname, content)
+            tokens = list(lexer.get_tokens(content))
+            self.words.update(
+                token[1] for token in tokens if token[0] in Token.Name
+            )
     def get_completions(self, document, complete_event):
         text = document.text_before_cursor
         words = text.split()