mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-02 02:34:59 +00:00
Handle SSL certificate errors in the Playwright-based web scraper
This commit is contained in:
parent
172af4ea57
commit
0f2aa62e80
1 changed files with 23 additions and 17 deletions
|
@ -119,24 +119,30 @@ class Scraper:
|
|||
self.print_error(str(e))
|
||||
return
|
||||
|
||||
page = browser.new_page(ignore_https_errors=not self.verify_ssl)
|
||||
|
||||
user_agent = page.evaluate("navigator.userAgent")
|
||||
user_agent = user_agent.replace("Headless", "")
|
||||
user_agent = user_agent.replace("headless", "")
|
||||
user_agent += " " + aider_user_agent
|
||||
|
||||
page = browser.new_page(user_agent=user_agent)
|
||||
try:
|
||||
page.goto(url, wait_until="networkidle", timeout=5000)
|
||||
except playwright._impl._errors.TimeoutError:
|
||||
pass
|
||||
|
||||
try:
|
||||
content = page.content()
|
||||
except playwright._impl._errors.Error as e:
|
||||
self.print_error(f"Error retrieving page content: {str(e)}")
|
||||
content = None
|
||||
context = browser.new_context(ignore_https_errors=not self.verify_ssl)
|
||||
page = context.new_page()
|
||||
|
||||
user_agent = page.evaluate("navigator.userAgent")
|
||||
user_agent = user_agent.replace("Headless", "")
|
||||
user_agent = user_agent.replace("headless", "")
|
||||
user_agent += " " + aider_user_agent
|
||||
|
||||
page.set_extra_http_headers({"User-Agent": user_agent})
|
||||
|
||||
try:
|
||||
page.goto(url, wait_until="networkidle", timeout=5000)
|
||||
except playwright._impl._errors.TimeoutError:
|
||||
self.print_error(f"Timeout while loading {url}")
|
||||
except playwright._impl._errors.Error as e:
|
||||
self.print_error(f"Error navigating to {url}: {str(e)}")
|
||||
return None
|
||||
|
||||
try:
|
||||
content = page.content()
|
||||
except playwright._impl._errors.Error as e:
|
||||
self.print_error(f"Error retrieving page content: {str(e)}")
|
||||
content = None
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue