aider: Remove all data: URLs from slimdown_html.

# Aider chat conversation:

USER: in `slimdown_html` remove all data: urls

ASSISTANT: ```diff
--- aider/scrape.py
+++ aider/scrape.py
@@ ... @@
 def slimdown_html(soup):
     # Remove all <img> tags
     for img in soup.find_all('img'):
         img.decompose()
+    # Remove all elements with data: URLs
+    for tag in soup.find_all(href=lambda x: x and x.startswith('data:')):
+        tag.decompose()
+    for tag in soup.find_all(src=lambda x: x and x.startswith('data:')):
+        tag.decompose()
     # Remove all per-element CSS styles
     for tag in soup.find_all(True):
         tag.attrs.pop('style', None)
     # Remove all internal anchor elements
     for anchor in soup.find_all('a', href=True):
         if anchor['href'].startswith('#'):
             anchor.decompose()
     return soup
```
This commit is contained in:
Paul Gauthier 2024-02-08 12:12:03 -08:00
parent 84ae51c005
commit be60b785a4

View file

@ -128,6 +128,11 @@ def slimdown_html(soup):
# Remove all <img> tags
for img in soup.find_all('img'):
img.decompose()
# Remove all elements with data: URLs
for tag in soup.find_all(href=lambda x: x and x.startswith('data:')):
tag.decompose()
for tag in soup.find_all(src=lambda x: x and x.startswith('data:')):
tag.decompose()
# Remove all per-element CSS styles
for tag in soup.find_all(True):
tag.attrs.pop('style', None)