mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-25 14:55:00 +00:00
aider: Add command /web to use headless selenium to scrape a webpage and add the content to the chat.
# Aider chat conversation: USER: Add a command /web to use headless selenium to scrape a webpage and add the content to the chat ASSISTANT: To implement the `/web` command, we will need to add a new method to the `Commands` class in the `aider/commands.py` file. This method will use Selenium with a headless browser to scrape the content of a given webpage and add the content to the chat. Here are the diffs for the changes: ```diff --- aider/commands.py +++ aider/commands.py @@ ... @@ from pathlib import Path +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.common.by import By +from webdriver_manager.chrome import ChromeDriverManager import re import subprocess @@ ... @@ class Commands: voice = None + web_driver = None def __init__(self, io, coder, voice_language=None): self.io = io @@ ... @@ self.voice_language = voice_language self.tokenizer = coder.main_model.tokenizer + self.initialize_web_driver() + def initialize_web_driver(self): + chrome_options = Options() + chrome_options.add_argument("--headless") + chrome_options.add_argument("--disable-gpu") + chrome_options.add_argument("--no-sandbox") + chrome_options.add_argument("--disable-dev-shm-usage") + self.web_driver = webdriver.Chrome( + service=Service(ChromeDriverManager().install()), + options=chrome_options + ) + + def cmd_web(self, args): + "Use headless selenium to scrape a webpage and add the content to the chat" + url = args.strip() + if not url: + self.io.tool_error("Please provide a URL to scrape.") + return + + try: + self.web_driver.get(url) + page_content = self.web_driver.find_element(By.TAG_NAME, "body").text + self.io.tool_output(f"Content from {url}:\n{page_content}") + return page_content + except Exception as e: + self.io.tool_error(f"Error scraping {url}: {e}") + def is_command(self, inp): return inp[0] in "/!" @@ ... @@ ``` This diff adds the necessary imports for Selenium and the WebDriver manager, initializes a headless Chrome WebDriver when the `Commands` class is instantiated, and adds a new command method `cmd_web` that takes a URL as an argument, scrapes the webpage, and outputs the content to the chat.
This commit is contained in:
parent
8d9c2ba60b
commit
ef12107bd7
1 changed files with 34 additions and 0 deletions
|
@ -1,3 +1,9 @@
|
|||
from pathlib import Path
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from selenium.webdriver.common.by import By
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
@ -14,6 +20,7 @@ from .dump import dump # noqa: F401
|
|||
|
||||
class Commands:
|
||||
voice = None
|
||||
web_driver = None
|
||||
|
||||
def __init__(self, io, coder, voice_language=None):
|
||||
self.io = io
|
||||
|
@ -24,6 +31,33 @@ class Commands:
|
|||
|
||||
self.voice_language = voice_language
|
||||
self.tokenizer = coder.main_model.tokenizer
|
||||
self.initialize_web_driver()
|
||||
|
||||
def initialize_web_driver(self):
|
||||
chrome_options = Options()
|
||||
chrome_options.add_argument("--headless")
|
||||
chrome_options.add_argument("--disable-gpu")
|
||||
chrome_options.add_argument("--no-sandbox")
|
||||
chrome_options.add_argument("--disable-dev-shm-usage")
|
||||
self.web_driver = webdriver.Chrome(
|
||||
service=Service(ChromeDriverManager().install()),
|
||||
options=chrome_options
|
||||
)
|
||||
|
||||
def cmd_web(self, args):
|
||||
"Use headless selenium to scrape a webpage and add the content to the chat"
|
||||
url = args.strip()
|
||||
if not url:
|
||||
self.io.tool_error("Please provide a URL to scrape.")
|
||||
return
|
||||
|
||||
try:
|
||||
self.web_driver.get(url)
|
||||
page_content = self.web_driver.find_element(By.TAG_NAME, "body").text
|
||||
self.io.tool_output(f"Content from {url}:\n{page_content}")
|
||||
return page_content
|
||||
except Exception as e:
|
||||
self.io.tool_error(f"Error scraping {url}: {e}")
|
||||
|
||||
def is_command(self, inp):
|
||||
return inp[0] in "/!"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue