This commit is contained in:
Paul Gauthier 2024-07-13 07:48:28 +01:00
parent 8948c7d47b
commit d9236d7684

View file

@ -3,21 +3,19 @@
import re import re
import sys import sys
import playwright
import pypandoc import pypandoc
from playwright.sync_api import sync_playwright
from aider import __version__, urls from aider import __version__, urls, utils
from aider.dump import dump # noqa: F401 from aider.dump import dump # noqa: F401
aider_user_agent = f"Aider/{__version__} +{urls.website}" aider_user_agent = f"Aider/{__version__} +{urls.website}"
# Playwright is nice because it has a simple way to install dependencies on most # Playwright is nice because it has a simple way to install dependencies on most
# platforms. # platforms.
PLAYWRIGHT_INFO = f""" PLAYWRIGHT_INFO = """
For better web scraping, install Playwright chromium with this command in your terminal: For better web scraping, install Playwright chromium:
playwright install --with-deps chromium {cmds}
See {urls.enable_playwright} for more info. See {urls.enable_playwright} for more info.
""" """
@ -62,6 +60,9 @@ class Scraper:
# Internals... # Internals...
def scrape_with_playwright(self, url): def scrape_with_playwright(self, url):
import playwright
from playwright.sync_api import sync_playwright
with sync_playwright() as p: with sync_playwright() as p:
try: try:
browser = p.chromium.launch() browser = p.chromium.launch()
@ -91,12 +92,33 @@ class Scraper:
if self.playwright_available is not None: if self.playwright_available is not None:
return return
try:
from playwright.sync_api import sync_playwright
has_pip = True
except ImportError:
has_pip = False
try: try:
with sync_playwright() as p: with sync_playwright() as p:
p.chromium.launch() p.chromium.launch()
self.playwright_available = True has_chromium = True
except Exception: except Exception:
self.playwright_available = False has_chromium = False
if has_pip and has_chromium:
self.playwright_available = True
pip_cmd = utils.get_pip_cmd("playwright")
chromium_cmd = "playwright install --with-deps chromium".split()
cmds = ""
if not has_pip:
cmds += " ".join(pip_cmd) + "\n"
if not has_chromium:
cmds += " ".join(chromium_cmd) + "\n"
text = PLAYWRIGHT_INFO.format(cmds=cmds)
def get_playwright_instructions(self): def get_playwright_instructions(self):
if self.playwright_available in (True, None): if self.playwright_available in (True, None):