mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-31 09:44:59 +00:00
wip
This commit is contained in:
parent
8948c7d47b
commit
d9236d7684
1 changed files with 30 additions and 8 deletions
|
@ -3,21 +3,19 @@
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import playwright
|
|
||||||
import pypandoc
|
import pypandoc
|
||||||
from playwright.sync_api import sync_playwright
|
|
||||||
|
|
||||||
from aider import __version__, urls
|
from aider import __version__, urls, utils
|
||||||
from aider.dump import dump # noqa: F401
|
from aider.dump import dump # noqa: F401
|
||||||
|
|
||||||
aider_user_agent = f"Aider/{__version__} +{urls.website}"
|
aider_user_agent = f"Aider/{__version__} +{urls.website}"
|
||||||
|
|
||||||
# Playwright is nice because it has a simple way to install dependencies on most
|
# Playwright is nice because it has a simple way to install dependencies on most
|
||||||
# platforms.
|
# platforms.
|
||||||
PLAYWRIGHT_INFO = f"""
|
PLAYWRIGHT_INFO = """
|
||||||
For better web scraping, install Playwright chromium with this command in your terminal:
|
For better web scraping, install Playwright chromium:
|
||||||
|
|
||||||
playwright install --with-deps chromium
|
{cmds}
|
||||||
|
|
||||||
See {urls.enable_playwright} for more info.
|
See {urls.enable_playwright} for more info.
|
||||||
"""
|
"""
|
||||||
|
@ -62,6 +60,9 @@ class Scraper:
|
||||||
|
|
||||||
# Internals...
|
# Internals...
|
||||||
def scrape_with_playwright(self, url):
|
def scrape_with_playwright(self, url):
|
||||||
|
import playwright
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
with sync_playwright() as p:
|
with sync_playwright() as p:
|
||||||
try:
|
try:
|
||||||
browser = p.chromium.launch()
|
browser = p.chromium.launch()
|
||||||
|
@ -91,12 +92,33 @@ class Scraper:
|
||||||
if self.playwright_available is not None:
|
if self.playwright_available is not None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
has_pip = True
|
||||||
|
except ImportError:
|
||||||
|
has_pip = False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with sync_playwright() as p:
|
with sync_playwright() as p:
|
||||||
p.chromium.launch()
|
p.chromium.launch()
|
||||||
self.playwright_available = True
|
has_chromium = True
|
||||||
except Exception:
|
except Exception:
|
||||||
self.playwright_available = False
|
has_chromium = False
|
||||||
|
|
||||||
|
if has_pip and has_chromium:
|
||||||
|
self.playwright_available = True
|
||||||
|
|
||||||
|
pip_cmd = utils.get_pip_cmd("playwright")
|
||||||
|
chromium_cmd = "playwright install --with-deps chromium".split()
|
||||||
|
|
||||||
|
cmds = ""
|
||||||
|
if not has_pip:
|
||||||
|
cmds += " ".join(pip_cmd) + "\n"
|
||||||
|
if not has_chromium:
|
||||||
|
cmds += " ".join(chromium_cmd) + "\n"
|
||||||
|
|
||||||
|
text = PLAYWRIGHT_INFO.format(cmds=cmds)
|
||||||
|
|
||||||
def get_playwright_instructions(self):
|
def get_playwright_instructions(self):
|
||||||
if self.playwright_available in (True, None):
|
if self.playwright_available in (True, None):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue