feat: Modify scrape method to only convert HTML to markdown

2025-05-31 17:55:01 +00:00 · 2024-08-10 04:55:11 -07:00 · 2024-08-10 04:55:11 -07:00 · c0982af02c
commit c0982af02c
parent f896d93b28
1 changed files with 7 additions and 5 deletions
--- a/aider/scrape.py
+++ b/aider/scrape.py
@ -87,9 +87,10 @@ class Scraper:
    def scrape(self, url):
        """
-        Scrape a url and turn it into readable markdown.
+        Scrape a url and turn it into readable markdown if it's HTML.
        If it's plain text or non-HTML, return it as-is.
-        `url` - the URLto scrape.
+        `url` - the URL to scrape.
        """
        if self.playwright_available:
@ -101,9 +102,10 @@ class Scraper:
            self.print_error(f"Failed to retrieve content from {url}")
            return None
-        self.try_pandoc()
+        # Check if the content is HTML
-
+        if content.strip().startswith(('<html', '<!DOCTYPE html')):
-        content = self.html_to_markdown(content)
+            self.try_pandoc()
            content = self.html_to_markdown(content)
        return content