mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-20 12:24:59 +00:00
added [playwright] extra
This commit is contained in:
parent
bc1369c480
commit
4fbe3d295a
12 changed files with 91 additions and 140 deletions
|
@ -9,7 +9,7 @@ import git
|
||||||
from aider import models, prompts, voice
|
from aider import models, prompts, voice
|
||||||
from aider.help import Help, install_help_extra
|
from aider.help import Help, install_help_extra
|
||||||
from aider.llm import litellm
|
from aider.llm import litellm
|
||||||
from aider.scrape import Scraper
|
from aider.scrape import Scraper, install_playwright
|
||||||
from aider.utils import is_image_file
|
from aider.utils import is_image_file
|
||||||
|
|
||||||
from .dump import dump # noqa: F401
|
from .dump import dump # noqa: F401
|
||||||
|
@ -65,17 +65,17 @@ class Commands:
|
||||||
return
|
return
|
||||||
|
|
||||||
if not self.scraper:
|
if not self.scraper:
|
||||||
self.scraper = Scraper(print_error=self.io.tool_error)
|
res = install_playwright(self.io)
|
||||||
|
if not res:
|
||||||
|
self.io.tool_error("Unable to initialize playwright.")
|
||||||
|
|
||||||
|
self.scraper = Scraper(print_error=self.io.tool_error, playwright_available=res)
|
||||||
|
|
||||||
content = self.scraper.scrape(url) or ""
|
content = self.scraper.scrape(url) or ""
|
||||||
# if content:
|
# if content:
|
||||||
# self.io.tool_output(content)
|
# self.io.tool_output(content)
|
||||||
|
|
||||||
instructions = self.scraper.get_playwright_instructions()
|
content = f"{url}:\n\n" + content
|
||||||
if instructions:
|
|
||||||
self.io.tool_error(instructions)
|
|
||||||
|
|
||||||
content = f"{url}:\n\n" + content # noqa: E231
|
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
|
@ -12,14 +12,59 @@ aider_user_agent = f"Aider/{__version__} +{urls.website}"
|
||||||
|
|
||||||
# Playwright is nice because it has a simple way to install dependencies on most
|
# Playwright is nice because it has a simple way to install dependencies on most
|
||||||
# platforms.
|
# platforms.
|
||||||
PLAYWRIGHT_INFO = """
|
|
||||||
For better web scraping, install Playwright chromium:
|
|
||||||
|
def install_playwright(io):
|
||||||
|
try:
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
has_pip = True
|
||||||
|
except ImportError:
|
||||||
|
has_pip = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
with sync_playwright() as p:
|
||||||
|
p.chromium.launch()
|
||||||
|
has_chromium = True
|
||||||
|
except Exception as err:
|
||||||
|
dump(err)
|
||||||
|
has_chromium = False
|
||||||
|
|
||||||
|
if has_pip and has_chromium:
|
||||||
|
return True
|
||||||
|
|
||||||
|
pip_cmd = utils.get_pip_install(["aider-chat[playwright]"])
|
||||||
|
chromium_cmd = "playwright install --with-deps chromium".split()
|
||||||
|
|
||||||
|
cmds = ""
|
||||||
|
if not has_pip:
|
||||||
|
cmds += " ".join(pip_cmd) + "\n"
|
||||||
|
if not has_chromium:
|
||||||
|
cmds += " ".join(chromium_cmd) + "\n"
|
||||||
|
|
||||||
|
text = f"""For the best web scraping, install Playwright:
|
||||||
|
|
||||||
{cmds}
|
{cmds}
|
||||||
|
|
||||||
See {urls.enable_playwright} for more info.
|
See {urls.enable_playwright} for more info.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
io.tool_error(text)
|
||||||
|
if not io.confirm_ask("Install playwright?", default="y"):
|
||||||
|
return
|
||||||
|
|
||||||
|
if not has_pip:
|
||||||
|
success, output = utils.run_install(pip_cmd)
|
||||||
|
if not success:
|
||||||
|
io.tool_error(output)
|
||||||
|
return
|
||||||
|
|
||||||
|
success, output = utils.run_install(chromium_cmd)
|
||||||
|
if not success:
|
||||||
|
io.tool_error(output)
|
||||||
|
return
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
class Scraper:
|
class Scraper:
|
||||||
pandoc_available = None
|
pandoc_available = None
|
||||||
|
@ -27,7 +72,7 @@ class Scraper:
|
||||||
playwright_instructions_shown = False
|
playwright_instructions_shown = False
|
||||||
|
|
||||||
# Public API...
|
# Public API...
|
||||||
def __init__(self, print_error=None):
|
def __init__(self, print_error=None, playwright_available=None):
|
||||||
"""
|
"""
|
||||||
`print_error` - a function to call to print error/debug info.
|
`print_error` - a function to call to print error/debug info.
|
||||||
"""
|
"""
|
||||||
|
@ -36,13 +81,14 @@ class Scraper:
|
||||||
else:
|
else:
|
||||||
self.print_error = print
|
self.print_error = print
|
||||||
|
|
||||||
|
self.playwright_available = playwright_available
|
||||||
|
|
||||||
def scrape(self, url):
|
def scrape(self, url):
|
||||||
"""
|
"""
|
||||||
Scrape a url and turn it into readable markdown.
|
Scrape a url and turn it into readable markdown.
|
||||||
|
|
||||||
`url` - the URLto scrape.
|
`url` - the URLto scrape.
|
||||||
"""
|
"""
|
||||||
self.try_playwright()
|
|
||||||
|
|
||||||
if self.playwright_available:
|
if self.playwright_available:
|
||||||
content = self.scrape_with_playwright(url)
|
content = self.scrape_with_playwright(url)
|
||||||
|
@ -88,46 +134,8 @@ class Scraper:
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def try_playwright(self):
|
|
||||||
if self.playwright_available is not None:
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
from playwright.sync_api import sync_playwright
|
|
||||||
|
|
||||||
has_pip = True
|
|
||||||
except ImportError:
|
|
||||||
has_pip = False
|
|
||||||
|
|
||||||
try:
|
|
||||||
with sync_playwright() as p:
|
|
||||||
p.chromium.launch()
|
|
||||||
has_chromium = True
|
|
||||||
except Exception:
|
|
||||||
has_chromium = False
|
|
||||||
|
|
||||||
if has_pip and has_chromium:
|
|
||||||
self.playwright_available = True
|
|
||||||
|
|
||||||
pip_cmd = utils.get_pip_cmd("playwright")
|
|
||||||
chromium_cmd = "playwright install --with-deps chromium".split()
|
|
||||||
|
|
||||||
cmds = ""
|
|
||||||
if not has_pip:
|
|
||||||
cmds += " ".join(pip_cmd) + "\n"
|
|
||||||
if not has_chromium:
|
|
||||||
cmds += " ".join(chromium_cmd) + "\n"
|
|
||||||
|
|
||||||
text = PLAYWRIGHT_INFO.format(cmds=cmds)
|
|
||||||
|
|
||||||
def get_playwright_instructions(self):
|
def get_playwright_instructions(self):
|
||||||
if self.playwright_available in (True, None):
|
return
|
||||||
return
|
|
||||||
if self.playwright_instructions_shown:
|
|
||||||
return
|
|
||||||
|
|
||||||
self.playwright_instructions_shown = True
|
|
||||||
return PLAYWRIGHT_INFO
|
|
||||||
|
|
||||||
def scrape_with_httpx(self, url):
|
def scrape_with_httpx(self, url):
|
||||||
import httpx
|
import httpx
|
||||||
|
|
|
@ -224,18 +224,21 @@ def run_install(cmd):
|
||||||
last_update = current_time
|
last_update = current_time
|
||||||
|
|
||||||
return_code = process.wait()
|
return_code = process.wait()
|
||||||
|
output = "".join(output)
|
||||||
|
|
||||||
|
dump(output)
|
||||||
|
|
||||||
if return_code == 0:
|
if return_code == 0:
|
||||||
print("\rInstallation complete.")
|
print("\rInstallation complete.")
|
||||||
print()
|
print()
|
||||||
return True, ''.join(output)
|
return True, output
|
||||||
|
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
print(f"\nError running pip install: {e}")
|
print(f"\nError running pip install: {e}")
|
||||||
|
|
||||||
print("\nInstallation failed.\n")
|
print("\nInstallation failed.\n")
|
||||||
|
|
||||||
return False, ''.join(output)
|
return False, output
|
||||||
|
|
||||||
|
|
||||||
def check_pip_install_extra(io, module, prompt, pip_install_cmd):
|
def check_pip_install_extra(io, module, prompt, pip_install_cmd):
|
||||||
|
@ -263,8 +266,7 @@ def check_pip_install_extra(io, module, prompt, pip_install_cmd):
|
||||||
except (ImportError, ModuleNotFoundError):
|
except (ImportError, ModuleNotFoundError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
for line in output:
|
io.tool_error(output)
|
||||||
print(line)
|
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print(f"Failed to install {pip_install_cmd[0]}")
|
print(f"Failed to install {pip_install_cmd[0]}")
|
||||||
|
|
|
@ -23,8 +23,6 @@ backoff==2.2.1
|
||||||
# via -r requirements/requirements.in
|
# via -r requirements/requirements.in
|
||||||
beautifulsoup4==4.12.3
|
beautifulsoup4==4.12.3
|
||||||
# via -r requirements/requirements.in
|
# via -r requirements/requirements.in
|
||||||
cachetools==5.3.3
|
|
||||||
# via google-auth
|
|
||||||
certifi==2024.7.4
|
certifi==2024.7.4
|
||||||
# via
|
# via
|
||||||
# httpcore
|
# httpcore
|
||||||
|
@ -60,48 +58,12 @@ gitdb==4.0.11
|
||||||
# via gitpython
|
# via gitpython
|
||||||
gitpython==3.1.43
|
gitpython==3.1.43
|
||||||
# via -r requirements/requirements.in
|
# via -r requirements/requirements.in
|
||||||
google-ai-generativelanguage==0.6.6
|
|
||||||
# via google-generativeai
|
|
||||||
google-api-core[grpc]==2.19.1
|
|
||||||
# via
|
|
||||||
# google-ai-generativelanguage
|
|
||||||
# google-api-python-client
|
|
||||||
# google-generativeai
|
|
||||||
google-api-python-client==2.136.0
|
|
||||||
# via google-generativeai
|
|
||||||
google-auth==2.31.0
|
|
||||||
# via
|
|
||||||
# google-ai-generativelanguage
|
|
||||||
# google-api-core
|
|
||||||
# google-api-python-client
|
|
||||||
# google-auth-httplib2
|
|
||||||
# google-generativeai
|
|
||||||
google-auth-httplib2==0.2.0
|
|
||||||
# via google-api-python-client
|
|
||||||
google-generativeai==0.7.1
|
|
||||||
# via -r requirements/requirements.in
|
|
||||||
googleapis-common-protos==1.63.2
|
|
||||||
# via
|
|
||||||
# google-api-core
|
|
||||||
# grpcio-status
|
|
||||||
greenlet==3.0.3
|
|
||||||
# via playwright
|
|
||||||
grep-ast==0.3.2
|
grep-ast==0.3.2
|
||||||
# via -r requirements/requirements.in
|
# via -r requirements/requirements.in
|
||||||
grpcio==1.64.1
|
|
||||||
# via
|
|
||||||
# google-api-core
|
|
||||||
# grpcio-status
|
|
||||||
grpcio-status==1.62.2
|
|
||||||
# via google-api-core
|
|
||||||
h11==0.14.0
|
h11==0.14.0
|
||||||
# via httpcore
|
# via httpcore
|
||||||
httpcore==1.0.5
|
httpcore==1.0.5
|
||||||
# via httpx
|
# via httpx
|
||||||
httplib2==0.22.0
|
|
||||||
# via
|
|
||||||
# google-api-python-client
|
|
||||||
# google-auth-httplib2
|
|
||||||
httpx==0.27.0
|
httpx==0.27.0
|
||||||
# via openai
|
# via openai
|
||||||
huggingface-hub==0.23.4
|
huggingface-hub==0.23.4
|
||||||
|
@ -158,49 +120,24 @@ pathspec==0.12.1
|
||||||
# grep-ast
|
# grep-ast
|
||||||
pillow==10.4.0
|
pillow==10.4.0
|
||||||
# via -r requirements/requirements.in
|
# via -r requirements/requirements.in
|
||||||
playwright==1.45.0
|
|
||||||
# via -r requirements/requirements.in
|
|
||||||
prompt-toolkit==3.0.47
|
prompt-toolkit==3.0.47
|
||||||
# via -r requirements/requirements.in
|
# via -r requirements/requirements.in
|
||||||
proto-plus==1.24.0
|
|
||||||
# via
|
|
||||||
# google-ai-generativelanguage
|
|
||||||
# google-api-core
|
|
||||||
protobuf==4.25.3
|
|
||||||
# via
|
|
||||||
# google-ai-generativelanguage
|
|
||||||
# google-api-core
|
|
||||||
# google-generativeai
|
|
||||||
# googleapis-common-protos
|
|
||||||
# grpcio-status
|
|
||||||
# proto-plus
|
|
||||||
pyasn1==0.6.0
|
|
||||||
# via
|
|
||||||
# pyasn1-modules
|
|
||||||
# rsa
|
|
||||||
pyasn1-modules==0.4.0
|
|
||||||
# via google-auth
|
|
||||||
pycodestyle==2.12.0
|
pycodestyle==2.12.0
|
||||||
# via flake8
|
# via flake8
|
||||||
pycparser==2.22
|
pycparser==2.22
|
||||||
# via cffi
|
# via cffi
|
||||||
pydantic==2.8.2
|
pydantic==2.8.2
|
||||||
# via
|
# via
|
||||||
# google-generativeai
|
|
||||||
# litellm
|
# litellm
|
||||||
# openai
|
# openai
|
||||||
pydantic-core==2.20.1
|
pydantic-core==2.20.1
|
||||||
# via pydantic
|
# via pydantic
|
||||||
pyee==11.1.0
|
|
||||||
# via playwright
|
|
||||||
pyflakes==3.2.0
|
pyflakes==3.2.0
|
||||||
# via flake8
|
# via flake8
|
||||||
pygments==2.18.0
|
pygments==2.18.0
|
||||||
# via rich
|
# via rich
|
||||||
pypandoc==1.13
|
pypandoc==1.13
|
||||||
# via -r requirements/requirements.in
|
# via -r requirements/requirements.in
|
||||||
pyparsing==3.1.2
|
|
||||||
# via httplib2
|
|
||||||
python-dotenv==1.0.1
|
python-dotenv==1.0.1
|
||||||
# via litellm
|
# via litellm
|
||||||
pyyaml==6.0.1
|
pyyaml==6.0.1
|
||||||
|
@ -215,7 +152,6 @@ regex==2024.5.15
|
||||||
# via tiktoken
|
# via tiktoken
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
# via
|
# via
|
||||||
# google-api-core
|
|
||||||
# huggingface-hub
|
# huggingface-hub
|
||||||
# litellm
|
# litellm
|
||||||
# tiktoken
|
# tiktoken
|
||||||
|
@ -225,8 +161,6 @@ rpds-py==0.18.1
|
||||||
# via
|
# via
|
||||||
# jsonschema
|
# jsonschema
|
||||||
# referencing
|
# referencing
|
||||||
rsa==4.9
|
|
||||||
# via google-auth
|
|
||||||
scipy==1.13.1
|
scipy==1.13.1
|
||||||
# via -r requirements/requirements.in
|
# via -r requirements/requirements.in
|
||||||
smmap==5.0.1
|
smmap==5.0.1
|
||||||
|
@ -248,7 +182,6 @@ tokenizers==0.19.1
|
||||||
# via litellm
|
# via litellm
|
||||||
tqdm==4.66.4
|
tqdm==4.66.4
|
||||||
# via
|
# via
|
||||||
# google-generativeai
|
|
||||||
# huggingface-hub
|
# huggingface-hub
|
||||||
# openai
|
# openai
|
||||||
tree-sitter==0.21.3
|
tree-sitter==0.21.3
|
||||||
|
@ -259,14 +192,10 @@ tree-sitter-languages==1.10.2
|
||||||
# via grep-ast
|
# via grep-ast
|
||||||
typing-extensions==4.12.2
|
typing-extensions==4.12.2
|
||||||
# via
|
# via
|
||||||
# google-generativeai
|
|
||||||
# huggingface-hub
|
# huggingface-hub
|
||||||
# openai
|
# openai
|
||||||
# pydantic
|
# pydantic
|
||||||
# pydantic-core
|
# pydantic-core
|
||||||
# pyee
|
|
||||||
uritemplate==4.1.1
|
|
||||||
# via google-api-python-client
|
|
||||||
urllib3==2.2.2
|
urllib3==2.2.2
|
||||||
# via requests
|
# via requests
|
||||||
wcwidth==0.2.13
|
wcwidth==0.2.13
|
||||||
|
|
|
@ -14,9 +14,7 @@ attrs==23.2.0
|
||||||
blinker==1.8.2
|
blinker==1.8.2
|
||||||
# via streamlit
|
# via streamlit
|
||||||
cachetools==5.3.3
|
cachetools==5.3.3
|
||||||
# via
|
# via streamlit
|
||||||
# -c requirements/../requirements.txt
|
|
||||||
# streamlit
|
|
||||||
certifi==2024.7.4
|
certifi==2024.7.4
|
||||||
# via
|
# via
|
||||||
# -c requirements/../requirements.txt
|
# -c requirements/../requirements.txt
|
||||||
|
@ -88,9 +86,7 @@ pillow==10.4.0
|
||||||
# -c requirements/../requirements.txt
|
# -c requirements/../requirements.txt
|
||||||
# streamlit
|
# streamlit
|
||||||
protobuf==4.25.3
|
protobuf==4.25.3
|
||||||
# via
|
# via streamlit
|
||||||
# -c requirements/../requirements.txt
|
|
||||||
# streamlit
|
|
||||||
pyarrow==16.1.0
|
pyarrow==16.1.0
|
||||||
# via streamlit
|
# via streamlit
|
||||||
pydeck==0.9.1
|
pydeck==0.9.1
|
||||||
|
|
|
@ -124,9 +124,7 @@ pygments==2.18.0
|
||||||
# rich
|
# rich
|
||||||
# sphinx
|
# sphinx
|
||||||
pyparsing==3.1.2
|
pyparsing==3.1.2
|
||||||
# via
|
# via matplotlib
|
||||||
# -c requirements/../requirements.txt
|
|
||||||
# matplotlib
|
|
||||||
pyproject-hooks==1.1.0
|
pyproject-hooks==1.1.0
|
||||||
# via
|
# via
|
||||||
# build
|
# build
|
||||||
|
|
|
@ -68,9 +68,7 @@ fsspec==2024.6.1
|
||||||
# llama-index-core
|
# llama-index-core
|
||||||
# torch
|
# torch
|
||||||
greenlet==3.0.3
|
greenlet==3.0.3
|
||||||
# via
|
# via sqlalchemy
|
||||||
# -c requirements/../requirements.txt
|
|
||||||
# sqlalchemy
|
|
||||||
h11==0.14.0
|
h11==0.14.0
|
||||||
# via
|
# via
|
||||||
# -c requirements/../requirements.txt
|
# -c requirements/../requirements.txt
|
||||||
|
|
3
requirements/requirements-playwright.in
Normal file
3
requirements/requirements-playwright.in
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
-c ../requirements.txt
|
||||||
|
|
||||||
|
playwright
|
16
requirements/requirements-playwright.txt
Normal file
16
requirements/requirements-playwright.txt
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
#
|
||||||
|
# This file is autogenerated by pip-compile with Python 3.12
|
||||||
|
# by the following command:
|
||||||
|
#
|
||||||
|
# pip-compile --output-file=requirements/requirements-playwright.txt requirements/requirements-playwright.in
|
||||||
|
#
|
||||||
|
greenlet==3.0.3
|
||||||
|
# via playwright
|
||||||
|
playwright==1.45.0
|
||||||
|
# via -r requirements/requirements-playwright.in
|
||||||
|
pyee==11.1.0
|
||||||
|
# via playwright
|
||||||
|
typing-extensions==4.12.2
|
||||||
|
# via
|
||||||
|
# -c requirements/../requirements.txt
|
||||||
|
# pyee
|
|
@ -19,7 +19,6 @@ beautifulsoup4
|
||||||
PyYAML
|
PyYAML
|
||||||
Pillow
|
Pillow
|
||||||
diff-match-patch
|
diff-match-patch
|
||||||
playwright
|
|
||||||
pypandoc
|
pypandoc
|
||||||
litellm
|
litellm
|
||||||
flake8
|
flake8
|
||||||
|
|
|
@ -8,7 +8,7 @@ pip-compile \
|
||||||
--output-file=requirements.txt \
|
--output-file=requirements.txt \
|
||||||
$1
|
$1
|
||||||
|
|
||||||
for SUFFIX in dev hf-embed browser; do
|
for SUFFIX in dev hf-embed browser playwright; do
|
||||||
|
|
||||||
pip-compile \
|
pip-compile \
|
||||||
requirements/requirements-${SUFFIX}.in \
|
requirements/requirements-${SUFFIX}.in \
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -23,6 +23,7 @@ requirements = get_requirements()
|
||||||
dev_requirements = get_requirements("dev")
|
dev_requirements = get_requirements("dev")
|
||||||
hf_requirements = get_requirements("hf-embed")
|
hf_requirements = get_requirements("hf-embed")
|
||||||
browser_requirements = get_requirements("browser")
|
browser_requirements = get_requirements("browser")
|
||||||
|
playwright_requirements = get_requirements("playwright")
|
||||||
|
|
||||||
# README
|
# README
|
||||||
with open("README.md", "r", encoding="utf-8") as f:
|
with open("README.md", "r", encoding="utf-8") as f:
|
||||||
|
@ -49,6 +50,7 @@ setup(
|
||||||
"dev": dev_requirements,
|
"dev": dev_requirements,
|
||||||
"hf-embed": hf_requirements,
|
"hf-embed": hf_requirements,
|
||||||
"browser": browser_requirements,
|
"browser": browser_requirements,
|
||||||
|
"playwright": playwright_requirements,
|
||||||
},
|
},
|
||||||
python_requires=">=3.9,<3.13",
|
python_requires=">=3.9,<3.13",
|
||||||
entry_points={
|
entry_points={
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue