mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-30 17:24:59 +00:00
Catch and report errors when scraping web pages with Playwright, without crashing the application.
This commit is contained in:
parent
d0ebc7a810
commit
5dc3bbb6fb
1 changed files with 10 additions and 3 deletions
|
@ -97,7 +97,8 @@ class Scraper:
|
||||||
content = self.scrape_with_httpx(url)
|
content = self.scrape_with_httpx(url)
|
||||||
|
|
||||||
if not content:
|
if not content:
|
||||||
return
|
self.print_error(f"Failed to retrieve content from {url}")
|
||||||
|
return None
|
||||||
|
|
||||||
self.try_pandoc()
|
self.try_pandoc()
|
||||||
|
|
||||||
|
@ -130,8 +131,14 @@ class Scraper:
|
||||||
page.goto(url, wait_until="networkidle", timeout=5000)
|
page.goto(url, wait_until="networkidle", timeout=5000)
|
||||||
except playwright._impl._errors.TimeoutError:
|
except playwright._impl._errors.TimeoutError:
|
||||||
pass
|
pass
|
||||||
content = page.content()
|
|
||||||
browser.close()
|
try:
|
||||||
|
content = page.content()
|
||||||
|
except playwright._impl._errors.Error as e:
|
||||||
|
self.print_error(f"Error retrieving page content: {str(e)}")
|
||||||
|
content = None
|
||||||
|
finally:
|
||||||
|
browser.close()
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue