aider: Add command /web to use headless selenium to scrape a webpage and add the content to the chat.

# Aider chat conversation: USER: Add a command /web to use headless selenium to scrape a webpage and add the content to the chat ASSISTANT: To implement the `/web` command, we will need to add a new method to the `Commands` class in the `aider/commands.py` file. This method will use Selenium with a headless browser to scrape the content of a given webpage and add the content to the chat. Here are the diffs for the changes: ```diff --- aider/commands.py +++ aider/commands.py @@ ... @@ from pathlib import Path +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.common.by import By +from webdriver_manager.chrome import ChromeDriverManager import re import subprocess @@ ... @@ class Commands: voice = None + web_driver = None def __init__(self, io, coder, voice_language=None): self.io = io @@ ... @@ self.voice_language = voice_language self.tokenizer = coder.main_model.tokenizer + self.initialize_web_driver() + def initialize_web_driver(self): + chrome_options = Options() + chrome_options.add_argument("--headless") + chrome_options.add_argument("--disable-gpu") + chrome_options.add_argument("--no-sandbox") + chrome_options.add_argument("--disable-dev-shm-usage") + self.web_driver = webdriver.Chrome( + service=Service(ChromeDriverManager().install()), + options=chrome_options + ) + + def cmd_web(self, args): + "Use headless selenium to scrape a webpage and add the content to the chat" + url = args.strip() + if not url: + self.io.tool_error("Please provide a URL to scrape.") + return + + try: + self.web_driver.get(url) + page_content = self.web_driver.find_element(By.TAG_NAME, "body").text + self.io.tool_output(f"Content from {url}:\n{page_content}") + return page_content + except Exception as e: + self.io.tool_error(f"Error scraping {url}: {e}") + def is_command(self, inp): return inp[0] in "/!" @@ ... @@ ``` This diff adds the necessary imports for Selenium and the WebDriver manager, initializes a headless Chrome WebDriver when the `Commands` class is instantiated, and adds a new command method `cmd_web` that takes a URL as an argument, scrapes the webpage, and outputs the content to the chat.
2025-05-25 14:55:00 +00:00 · 2024-02-07 14:22:02 -08:00 · 2024-02-07 14:22:02 -08:00 · ef12107bd7
commit ef12107bd7
parent 8d9c2ba60b
1 changed files with 34 additions and 0 deletions
--- a/aider/commands.py
+++ b/aider/commands.py
@ -1,3 +1,9 @@
+from pathlib import Path
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from webdriver_manager.chrome import ChromeDriverManager
 import re
 import subprocess
 import sys
@ -14,6 +20,7 @@ from .dump import dump  # noqa: F401

 class Commands:
    voice = None
+    web_driver = None

    def __init__(self, io, coder, voice_language=None):
        self.io = io
@ -24,6 +31,33 @@ class Commands:

        self.voice_language = voice_language
        self.tokenizer = coder.main_model.tokenizer
+        self.initialize_web_driver()
+
+    def initialize_web_driver(self):
+        chrome_options = Options()
+        chrome_options.add_argument("--headless")
+        chrome_options.add_argument("--disable-gpu")
+        chrome_options.add_argument("--no-sandbox")
+        chrome_options.add_argument("--disable-dev-shm-usage")
+        self.web_driver = webdriver.Chrome(
+            service=Service(ChromeDriverManager().install()),
+            options=chrome_options
+        )
+
+    def cmd_web(self, args):
+        "Use headless selenium to scrape a webpage and add the content to the chat"
+        url = args.strip()
+        if not url:
+            self.io.tool_error("Please provide a URL to scrape.")
+            return
+
+        try:
+            self.web_driver.get(url)
+            page_content = self.web_driver.find_element(By.TAG_NAME, "body").text
+            self.io.tool_output(f"Content from {url}:\n{page_content}")
+            return page_content
+        except Exception as e:
+            self.io.tool_error(f"Error scraping {url}: {e}")

    def is_command(self, inp):
        return inp[0] in "/!"