Catch and report errors when scraping web pages with Playwright, without crashing the application.

2025-05-28 16:25:00 +00:00 · 2024-07-25 20:24:32 +02:00 · 2024-07-25 20:24:32 +02:00 · 5dc3bbb6fb
commit 5dc3bbb6fb
parent d0ebc7a810
1 changed files with 10 additions and 3 deletions
--- a/aider/scrape.py
+++ b/aider/scrape.py
@ -97,7 +97,8 @@ class Scraper:
            content = self.scrape_with_httpx(url)

        if not content:
-            return
+            self.print_error(f"Failed to retrieve content from {url}")
+            return None

        self.try_pandoc()

@ -130,8 +131,14 @@ class Scraper:
                page.goto(url, wait_until="networkidle", timeout=5000)
            except playwright._impl._errors.TimeoutError:
                pass
-            content = page.content()
-            browser.close()
+            
+            try:
+                content = page.content()
+            except playwright._impl._errors.Error as e:
+                self.print_error(f"Error retrieving page content: {str(e)}")
+                content = None
+            finally:
+                browser.close()

        return content