From c0982af02c82bb7c33d632a0fce622b135b02226 Mon Sep 17 00:00:00 2001 From: "Paul Gauthier (aider)" Date: Sat, 10 Aug 2024 04:55:11 -0700 Subject: [PATCH] feat: Modify scrape method to only convert HTML to markdown --- aider/scrape.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/aider/scrape.py b/aider/scrape.py index 7d3bed945..2fbbd35a4 100755 --- a/aider/scrape.py +++ b/aider/scrape.py @@ -87,9 +87,10 @@ class Scraper: def scrape(self, url): """ - Scrape a url and turn it into readable markdown. + Scrape a url and turn it into readable markdown if it's HTML. + If it's plain text or non-HTML, return it as-is. - `url` - the URLto scrape. + `url` - the URL to scrape. """ if self.playwright_available: @@ -101,9 +102,10 @@ class Scraper: self.print_error(f"Failed to retrieve content from {url}") return None - self.try_pandoc() - - content = self.html_to_markdown(content) + # Check if the content is HTML + if content.strip().startswith(('