mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-03 19:24:59 +00:00
Handle SSL certificate errors in the Playwright-based web scraper
This commit is contained in:
parent
172af4ea57
commit
0f2aa62e80
1 changed files with 23 additions and 17 deletions
|
@ -119,24 +119,30 @@ class Scraper:
|
||||||
self.print_error(str(e))
|
self.print_error(str(e))
|
||||||
return
|
return
|
||||||
|
|
||||||
page = browser.new_page(ignore_https_errors=not self.verify_ssl)
|
|
||||||
|
|
||||||
user_agent = page.evaluate("navigator.userAgent")
|
|
||||||
user_agent = user_agent.replace("Headless", "")
|
|
||||||
user_agent = user_agent.replace("headless", "")
|
|
||||||
user_agent += " " + aider_user_agent
|
|
||||||
|
|
||||||
page = browser.new_page(user_agent=user_agent)
|
|
||||||
try:
|
try:
|
||||||
page.goto(url, wait_until="networkidle", timeout=5000)
|
context = browser.new_context(ignore_https_errors=not self.verify_ssl)
|
||||||
except playwright._impl._errors.TimeoutError:
|
page = context.new_page()
|
||||||
pass
|
|
||||||
|
user_agent = page.evaluate("navigator.userAgent")
|
||||||
try:
|
user_agent = user_agent.replace("Headless", "")
|
||||||
content = page.content()
|
user_agent = user_agent.replace("headless", "")
|
||||||
except playwright._impl._errors.Error as e:
|
user_agent += " " + aider_user_agent
|
||||||
self.print_error(f"Error retrieving page content: {str(e)}")
|
|
||||||
content = None
|
page.set_extra_http_headers({"User-Agent": user_agent})
|
||||||
|
|
||||||
|
try:
|
||||||
|
page.goto(url, wait_until="networkidle", timeout=5000)
|
||||||
|
except playwright._impl._errors.TimeoutError:
|
||||||
|
self.print_error(f"Timeout while loading {url}")
|
||||||
|
except playwright._impl._errors.Error as e:
|
||||||
|
self.print_error(f"Error navigating to {url}: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
content = page.content()
|
||||||
|
except playwright._impl._errors.Error as e:
|
||||||
|
self.print_error(f"Error retrieving page content: {str(e)}")
|
||||||
|
content = None
|
||||||
finally:
|
finally:
|
||||||
browser.close()
|
browser.close()
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue