From 5dc3bbb6fbce1c0bb1dcb4bd785a77071bffa344 Mon Sep 17 00:00:00 2001 From: "Paul Gauthier (aider)" Date: Thu, 25 Jul 2024 20:24:32 +0200 Subject: [PATCH] Catch and report errors when scraping web pages with Playwright, without crashing the application. --- aider/scrape.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/aider/scrape.py b/aider/scrape.py index ca08b9c1f..6cdd1787a 100755 --- a/aider/scrape.py +++ b/aider/scrape.py @@ -97,7 +97,8 @@ class Scraper: content = self.scrape_with_httpx(url) if not content: - return + self.print_error(f"Failed to retrieve content from {url}") + return None self.try_pandoc() @@ -130,8 +131,14 @@ class Scraper: page.goto(url, wait_until="networkidle", timeout=5000) except playwright._impl._errors.TimeoutError: pass - content = page.content() - browser.close() + + try: + content = page.content() + except playwright._impl._errors.Error as e: + self.print_error(f"Error retrieving page content: {str(e)}") + content = None + finally: + browser.close() return content