mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-29 00:35:00 +00:00
wip
This commit is contained in:
parent
61531b9430
commit
1758937042
3 changed files with 157 additions and 91 deletions
|
@ -1,9 +1,3 @@
|
|||
from pathlib import Path
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from selenium.webdriver.common.by import By
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
@ -13,7 +7,7 @@ import git
|
|||
from prompt_toolkit.completion import Completion
|
||||
|
||||
from aider import prompts, voice
|
||||
from aider.utils import is_gpt4_with_openai_base_url, is_image_file
|
||||
from aider.utils import is_gpt4_with_openai_base_url, is_image_file, scrape
|
||||
|
||||
from .dump import dump # noqa: F401
|
||||
|
||||
|
@ -31,18 +25,6 @@ class Commands:
|
|||
|
||||
self.voice_language = voice_language
|
||||
self.tokenizer = coder.main_model.tokenizer
|
||||
self.initialize_web_driver()
|
||||
|
||||
def initialize_web_driver(self):
|
||||
chrome_options = Options()
|
||||
chrome_options.add_argument("--headless")
|
||||
chrome_options.add_argument("--disable-gpu")
|
||||
chrome_options.add_argument("--no-sandbox")
|
||||
chrome_options.add_argument("--disable-dev-shm-usage")
|
||||
self.web_driver = webdriver.Chrome(
|
||||
service=Service(ChromeDriverManager().install()),
|
||||
options=chrome_options
|
||||
)
|
||||
|
||||
def cmd_web(self, args):
|
||||
"Use headless selenium to scrape a webpage and add the content to the chat"
|
||||
|
@ -51,13 +33,9 @@ class Commands:
|
|||
self.io.tool_error("Please provide a URL to scrape.")
|
||||
return
|
||||
|
||||
try:
|
||||
self.web_driver.get(url)
|
||||
page_content = self.web_driver.find_element(By.TAG_NAME, "body").text
|
||||
self.io.tool_output(f"Content from {url}:\n{page_content}")
|
||||
return page_content
|
||||
except Exception as e:
|
||||
self.io.tool_error(f"Error scraping {url}: {e}")
|
||||
content = scrape(url)
|
||||
print(content)
|
||||
return content
|
||||
|
||||
def is_command(self, inp):
|
||||
return inp[0] in "/!"
|
||||
|
|
132
aider/utils.py
132
aider/utils.py
|
@ -1,9 +1,32 @@
|
|||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
import git
|
||||
from typing import Type
|
||||
|
||||
IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'}
|
||||
import git
|
||||
from bs4 import BeautifulSoup
|
||||
from selenium.common.exceptions import WebDriverException
|
||||
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||
from selenium.webdriver.chrome.service import Service as ChromeDriverService
|
||||
from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.options import ArgOptions as BrowserOptions
|
||||
from selenium.webdriver.edge.options import Options as EdgeOptions
|
||||
from selenium.webdriver.edge.service import Service as EdgeDriverService
|
||||
from selenium.webdriver.edge.webdriver import WebDriver as EdgeDriver
|
||||
from selenium.webdriver.firefox.options import Options as FirefoxOptions
|
||||
from selenium.webdriver.firefox.service import Service as GeckoDriverService
|
||||
from selenium.webdriver.firefox.webdriver import WebDriver as FirefoxDriver
|
||||
from selenium.webdriver.remote.webdriver import WebDriver
|
||||
from selenium.webdriver.safari.options import Options as SafariOptions
|
||||
from selenium.webdriver.safari.webdriver import WebDriver as SafariDriver
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
from webdriver_manager.firefox import GeckoDriverManager
|
||||
from webdriver_manager.microsoft import EdgeChromiumDriverManager as EdgeDriverManager
|
||||
|
||||
IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"}
|
||||
|
||||
from aider.dump import dump # noqa: F401
|
||||
|
||||
|
@ -65,6 +88,7 @@ def make_repo(path=None):
|
|||
|
||||
return repo
|
||||
|
||||
|
||||
def is_image_file(file_name):
|
||||
"""
|
||||
Check if the given file name has an image file extension.
|
||||
|
@ -103,6 +127,7 @@ def show_messages(messages, title=None, functions=None):
|
|||
if functions:
|
||||
dump(functions)
|
||||
|
||||
|
||||
def is_gpt4_with_openai_base_url(model_name, client):
|
||||
"""
|
||||
Check if the model_name starts with 'gpt-4' and the client base URL includes 'api.openai.com'.
|
||||
|
@ -111,6 +136,107 @@ def is_gpt4_with_openai_base_url(model_name, client):
|
|||
:param client: The OpenAI client instance.
|
||||
:return: True if conditions are met, False otherwise.
|
||||
"""
|
||||
if client is None or not hasattr(client, 'base_url'):
|
||||
if client is None or not hasattr(client, "base_url"):
|
||||
return False
|
||||
return model_name.startswith("gpt-4") and "api.openai.com" in client.base_url.host
|
||||
|
||||
|
||||
# Taken from AutoGPT, MIT License
|
||||
def open_page_in_browser(
|
||||
url: str,
|
||||
selenium_web_browser="chrome",
|
||||
selenium_headless=True,
|
||||
platform="linux",
|
||||
user_agent="Aider CLI 0.23.0",
|
||||
) -> WebDriver:
|
||||
"""Open a browser window and load a web page using Selenium
|
||||
|
||||
Params:
|
||||
url (str): The URL of the page to load
|
||||
config (Config): The applicable application configuration
|
||||
|
||||
Returns:
|
||||
driver (WebDriver): A driver object representing the browser window to scrape
|
||||
"""
|
||||
options_available: dict[str, Type[BrowserOptions]] = {
|
||||
"chrome": ChromeOptions,
|
||||
"edge": EdgeOptions,
|
||||
"firefox": FirefoxOptions,
|
||||
"safari": SafariOptions,
|
||||
}
|
||||
|
||||
options: BrowserOptions = options_available[selenium_web_browser]()
|
||||
options.add_argument(f"user-agent={user_agent}")
|
||||
|
||||
if selenium_web_browser == "firefox":
|
||||
if selenium_headless:
|
||||
options.headless = True
|
||||
options.add_argument("--disable-gpu")
|
||||
driver = FirefoxDriver(
|
||||
service=GeckoDriverService(GeckoDriverManager().install()), options=options
|
||||
)
|
||||
elif selenium_web_browser == "edge":
|
||||
driver = EdgeDriver(
|
||||
service=EdgeDriverService(EdgeDriverManager().install()), options=options
|
||||
)
|
||||
elif selenium_web_browser == "safari":
|
||||
# Requires a bit more setup on the users end.
|
||||
# See https://developer.apple.com/documentation/webkit/testing_with_webdriver_in_safari # noqa: E501
|
||||
driver = SafariDriver(options=options)
|
||||
else:
|
||||
if platform == "linux" or platform == "linux2":
|
||||
options.add_argument("--disable-dev-shm-usage")
|
||||
options.add_argument("--remote-debugging-port=9222")
|
||||
|
||||
options.add_argument("--no-sandbox")
|
||||
if selenium_headless:
|
||||
options.add_argument("--headless=new")
|
||||
options.add_argument("--disable-gpu")
|
||||
|
||||
chromium_driver_path = Path("/usr/bin/chromedriver")
|
||||
|
||||
driver = ChromeDriver(
|
||||
service=(
|
||||
ChromeDriverService(str(chromium_driver_path))
|
||||
if chromium_driver_path.exists()
|
||||
else ChromeDriverService(ChromeDriverManager().install())
|
||||
),
|
||||
options=options,
|
||||
)
|
||||
driver.get(url)
|
||||
|
||||
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
|
||||
|
||||
return driver
|
||||
|
||||
|
||||
# Taken from AutoGPT, MIT License
|
||||
def scrape_text_with_selenium(driver: WebDriver) -> str:
|
||||
"""Scrape text from a browser window using selenium
|
||||
|
||||
Args:
|
||||
driver (WebDriver): A driver object representing the browser window to scrape
|
||||
|
||||
Returns:
|
||||
str: the text scraped from the website
|
||||
"""
|
||||
|
||||
# Get the HTML content directly from the browser's DOM
|
||||
page_source = driver.execute_script("return document.body.outerHTML;")
|
||||
soup = BeautifulSoup(page_source, "html.parser")
|
||||
|
||||
for script in soup(["script", "style"]):
|
||||
script.extract()
|
||||
|
||||
text = soup.get_text()
|
||||
lines = (line.strip() for line in text.splitlines())
|
||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||
text = "\n".join(chunk for chunk in chunks if chunk)
|
||||
return text
|
||||
|
||||
|
||||
def scrape(url: str):
|
||||
driver = open_page_in_browser(url)
|
||||
text = scrape_text_with_selenium(driver)
|
||||
driver.quit()
|
||||
return text
|
||||
|
|
|
@ -17,15 +17,11 @@ attrs==23.2.0
|
|||
# referencing
|
||||
# trio
|
||||
backoff==2.2.1
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
beautifulsoup4==4.12.3
|
||||
# via bs4
|
||||
bs4==0.0.2
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
certifi==2023.11.17
|
||||
# via
|
||||
# httpcore
|
||||
|
@ -38,30 +34,22 @@ cffi==1.16.0
|
|||
# soundfile
|
||||
charset-normalizer==3.3.2
|
||||
# via requests
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
configargparse==1.7
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
diff-match-patch==20230430
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
diskcache==5.6.3
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
distro==1.9.0
|
||||
# via openai
|
||||
gitdb==4.0.11
|
||||
# via gitpython
|
||||
gitpython==3.1.40
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
grep-ast==0.2.4
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
h11==0.14.0
|
||||
# via
|
||||
# httpcore
|
||||
|
@ -77,9 +65,7 @@ idna==3.6
|
|||
# requests
|
||||
# trio
|
||||
jsonschema==4.20.0
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
jsonschema-specifications==2023.12.1
|
||||
# via jsonschema
|
||||
markdown-it-py==3.0.0
|
||||
|
@ -87,17 +73,13 @@ markdown-it-py==3.0.0
|
|||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
networkx==3.2.1
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
numpy==1.26.3
|
||||
# via
|
||||
# -r requirements.in
|
||||
# scipy
|
||||
openai==1.6.1
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
outcome==1.3.0.post0
|
||||
# via trio
|
||||
packaging==23.2
|
||||
|
@ -109,13 +91,9 @@ pathspec==0.12.1
|
|||
# -r requirements.in
|
||||
# grep-ast
|
||||
pillow==10.2.0
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
prompt-toolkit==3.0.43
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
pycparser==2.21
|
||||
# via cffi
|
||||
pydantic==2.5.3
|
||||
|
@ -129,9 +107,7 @@ pysocks==1.7.1
|
|||
python-dotenv==1.0.1
|
||||
# via webdriver-manager
|
||||
pyyaml==6.0.1
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
referencing==0.32.0
|
||||
# via
|
||||
# jsonschema
|
||||
|
@ -143,21 +119,15 @@ requests==2.31.0
|
|||
# tiktoken
|
||||
# webdriver-manager
|
||||
rich==13.7.0
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
rpds-py==0.16.2
|
||||
# via
|
||||
# jsonschema
|
||||
# referencing
|
||||
scipy==1.11.4
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
selenium==4.17.2
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
smmap==5.0.1
|
||||
# via gitdb
|
||||
sniffio==1.3.0
|
||||
|
@ -169,19 +139,13 @@ sniffio==1.3.0
|
|||
sortedcontainers==2.4.0
|
||||
# via trio
|
||||
sounddevice==0.4.6
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
soundfile==0.12.1
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
soupsieve==2.5
|
||||
# via beautifulsoup4
|
||||
tiktoken==0.5.2
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
tqdm==4.66.1
|
||||
# via openai
|
||||
tree-sitter==0.20.4
|
||||
|
@ -208,8 +172,6 @@ urllib3[socks]==2.1.0
|
|||
wcwidth==0.2.12
|
||||
# via prompt-toolkit
|
||||
webdriver-manager==4.0.1
|
||||
selenium==4.17.2
|
||||
chromedriver-autoinstaller==0.3.1
|
||||
# via -r requirements.in
|
||||
# via -r requirements.in
|
||||
wsproto==1.2.0
|
||||
# via trio-websocket
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue