diff --git a/aider/commands.py b/aider/commands.py index 2b2dcf6af..12ecd63ba 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -1,3 +1,9 @@ +from pathlib import Path +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.common.by import By +from webdriver_manager.chrome import ChromeDriverManager import re import subprocess import sys @@ -14,6 +20,7 @@ from .dump import dump # noqa: F401 class Commands: voice = None + web_driver = None def __init__(self, io, coder, voice_language=None): self.io = io @@ -24,6 +31,33 @@ class Commands: self.voice_language = voice_language self.tokenizer = coder.main_model.tokenizer + self.initialize_web_driver() + + def initialize_web_driver(self): + chrome_options = Options() + chrome_options.add_argument("--headless") + chrome_options.add_argument("--disable-gpu") + chrome_options.add_argument("--no-sandbox") + chrome_options.add_argument("--disable-dev-shm-usage") + self.web_driver = webdriver.Chrome( + service=Service(ChromeDriverManager().install()), + options=chrome_options + ) + + def cmd_web(self, args): + "Use headless selenium to scrape a webpage and add the content to the chat" + url = args.strip() + if not url: + self.io.tool_error("Please provide a URL to scrape.") + return + + try: + self.web_driver.get(url) + page_content = self.web_driver.find_element(By.TAG_NAME, "body").text + self.io.tool_output(f"Content from {url}:\n{page_content}") + return page_content + except Exception as e: + self.io.tool_error(f"Error scraping {url}: {e}") def is_command(self, inp): return inp[0] in "/!"