Merge pull request #3381 from jkeys089/scraper-main-use-playwright

use playwright if available when invoking scraper via cli
This commit is contained in:
paul-gauthier 2025-05-07 17:30:59 -07:00 committed by GitHub
commit af9fcdcfa8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 14 additions and 4 deletions

View file

@ -11,7 +11,7 @@ from aider.coders import Coder
from aider.dump import dump # noqa: F401 from aider.dump import dump # noqa: F401
from aider.io import InputOutput from aider.io import InputOutput
from aider.main import main as cli_main from aider.main import main as cli_main
from aider.scrape import Scraper from aider.scrape import Scraper, has_playwright
class CaptureIO(InputOutput): class CaptureIO(InputOutput):
@ -484,7 +484,7 @@ class GUI:
url = self.web_content url = self.web_content
if not self.state.scraper: if not self.state.scraper:
self.scraper = Scraper(print_error=self.info) self.scraper = Scraper(print_error=self.info, playwright_available=has_playwright())
content = self.scraper.scrape(url) or "" content = self.scraper.scrape(url) or ""
if content.strip(): if content.strip():

View file

@ -14,7 +14,7 @@ aider_user_agent = f"Aider/{__version__} +{urls.website}"
# platforms. # platforms.
def install_playwright(io): def check_env():
try: try:
from playwright.sync_api import sync_playwright from playwright.sync_api import sync_playwright
@ -29,6 +29,16 @@ def install_playwright(io):
except Exception: except Exception:
has_chromium = False has_chromium = False
return has_pip, has_chromium
def has_playwright():
has_pip, has_chromium = check_env()
return has_pip and has_chromium
def install_playwright(io):
has_pip, has_chromium = check_env()
if has_pip and has_chromium: if has_pip and has_chromium:
return True return True
@ -262,7 +272,7 @@ def slimdown_html(soup):
def main(url): def main(url):
scraper = Scraper() scraper = Scraper(playwright_available=has_playwright())
content = scraper.scrape(url) content = scraper.scrape(url)
print(content) print(content)