mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-13 08:05:01 +00:00
added [playwright] extra
This commit is contained in:
parent
bc1369c480
commit
4fbe3d295a
12 changed files with 91 additions and 140 deletions
|
@ -9,7 +9,7 @@ import git
|
|||
from aider import models, prompts, voice
|
||||
from aider.help import Help, install_help_extra
|
||||
from aider.llm import litellm
|
||||
from aider.scrape import Scraper
|
||||
from aider.scrape import Scraper, install_playwright
|
||||
from aider.utils import is_image_file
|
||||
|
||||
from .dump import dump # noqa: F401
|
||||
|
@ -65,17 +65,17 @@ class Commands:
|
|||
return
|
||||
|
||||
if not self.scraper:
|
||||
self.scraper = Scraper(print_error=self.io.tool_error)
|
||||
res = install_playwright(self.io)
|
||||
if not res:
|
||||
self.io.tool_error("Unable to initialize playwright.")
|
||||
|
||||
self.scraper = Scraper(print_error=self.io.tool_error, playwright_available=res)
|
||||
|
||||
content = self.scraper.scrape(url) or ""
|
||||
# if content:
|
||||
# self.io.tool_output(content)
|
||||
|
||||
instructions = self.scraper.get_playwright_instructions()
|
||||
if instructions:
|
||||
self.io.tool_error(instructions)
|
||||
|
||||
content = f"{url}:\n\n" + content # noqa: E231
|
||||
content = f"{url}:\n\n" + content
|
||||
|
||||
return content
|
||||
|
||||
|
|
|
@ -12,14 +12,59 @@ aider_user_agent = f"Aider/{__version__} +{urls.website}"
|
|||
|
||||
# Playwright is nice because it has a simple way to install dependencies on most
|
||||
# platforms.
|
||||
PLAYWRIGHT_INFO = """
|
||||
For better web scraping, install Playwright chromium:
|
||||
|
||||
|
||||
def install_playwright(io):
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
has_pip = True
|
||||
except ImportError:
|
||||
has_pip = False
|
||||
|
||||
try:
|
||||
with sync_playwright() as p:
|
||||
p.chromium.launch()
|
||||
has_chromium = True
|
||||
except Exception as err:
|
||||
dump(err)
|
||||
has_chromium = False
|
||||
|
||||
if has_pip and has_chromium:
|
||||
return True
|
||||
|
||||
pip_cmd = utils.get_pip_install(["aider-chat[playwright]"])
|
||||
chromium_cmd = "playwright install --with-deps chromium".split()
|
||||
|
||||
cmds = ""
|
||||
if not has_pip:
|
||||
cmds += " ".join(pip_cmd) + "\n"
|
||||
if not has_chromium:
|
||||
cmds += " ".join(chromium_cmd) + "\n"
|
||||
|
||||
text = f"""For the best web scraping, install Playwright:
|
||||
|
||||
{cmds}
|
||||
|
||||
See {urls.enable_playwright} for more info.
|
||||
"""
|
||||
|
||||
io.tool_error(text)
|
||||
if not io.confirm_ask("Install playwright?", default="y"):
|
||||
return
|
||||
|
||||
if not has_pip:
|
||||
success, output = utils.run_install(pip_cmd)
|
||||
if not success:
|
||||
io.tool_error(output)
|
||||
return
|
||||
|
||||
success, output = utils.run_install(chromium_cmd)
|
||||
if not success:
|
||||
io.tool_error(output)
|
||||
return
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class Scraper:
|
||||
pandoc_available = None
|
||||
|
@ -27,7 +72,7 @@ class Scraper:
|
|||
playwright_instructions_shown = False
|
||||
|
||||
# Public API...
|
||||
def __init__(self, print_error=None):
|
||||
def __init__(self, print_error=None, playwright_available=None):
|
||||
"""
|
||||
`print_error` - a function to call to print error/debug info.
|
||||
"""
|
||||
|
@ -36,13 +81,14 @@ class Scraper:
|
|||
else:
|
||||
self.print_error = print
|
||||
|
||||
self.playwright_available = playwright_available
|
||||
|
||||
def scrape(self, url):
|
||||
"""
|
||||
Scrape a url and turn it into readable markdown.
|
||||
|
||||
`url` - the URLto scrape.
|
||||
"""
|
||||
self.try_playwright()
|
||||
|
||||
if self.playwright_available:
|
||||
content = self.scrape_with_playwright(url)
|
||||
|
@ -88,46 +134,8 @@ class Scraper:
|
|||
|
||||
return content
|
||||
|
||||
def try_playwright(self):
|
||||
if self.playwright_available is not None:
|
||||
return
|
||||
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
has_pip = True
|
||||
except ImportError:
|
||||
has_pip = False
|
||||
|
||||
try:
|
||||
with sync_playwright() as p:
|
||||
p.chromium.launch()
|
||||
has_chromium = True
|
||||
except Exception:
|
||||
has_chromium = False
|
||||
|
||||
if has_pip and has_chromium:
|
||||
self.playwright_available = True
|
||||
|
||||
pip_cmd = utils.get_pip_cmd("playwright")
|
||||
chromium_cmd = "playwright install --with-deps chromium".split()
|
||||
|
||||
cmds = ""
|
||||
if not has_pip:
|
||||
cmds += " ".join(pip_cmd) + "\n"
|
||||
if not has_chromium:
|
||||
cmds += " ".join(chromium_cmd) + "\n"
|
||||
|
||||
text = PLAYWRIGHT_INFO.format(cmds=cmds)
|
||||
|
||||
def get_playwright_instructions(self):
|
||||
if self.playwright_available in (True, None):
|
||||
return
|
||||
if self.playwright_instructions_shown:
|
||||
return
|
||||
|
||||
self.playwright_instructions_shown = True
|
||||
return PLAYWRIGHT_INFO
|
||||
return
|
||||
|
||||
def scrape_with_httpx(self, url):
|
||||
import httpx
|
||||
|
|
|
@ -224,18 +224,21 @@ def run_install(cmd):
|
|||
last_update = current_time
|
||||
|
||||
return_code = process.wait()
|
||||
output = "".join(output)
|
||||
|
||||
dump(output)
|
||||
|
||||
if return_code == 0:
|
||||
print("\rInstallation complete.")
|
||||
print()
|
||||
return True, ''.join(output)
|
||||
return True, output
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"\nError running pip install: {e}")
|
||||
|
||||
print("\nInstallation failed.\n")
|
||||
|
||||
return False, ''.join(output)
|
||||
return False, output
|
||||
|
||||
|
||||
def check_pip_install_extra(io, module, prompt, pip_install_cmd):
|
||||
|
@ -263,8 +266,7 @@ def check_pip_install_extra(io, module, prompt, pip_install_cmd):
|
|||
except (ImportError, ModuleNotFoundError):
|
||||
pass
|
||||
|
||||
for line in output:
|
||||
print(line)
|
||||
io.tool_error(output)
|
||||
|
||||
print()
|
||||
print(f"Failed to install {pip_install_cmd[0]}")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue