From be60b785a4e87142cc11b16ac5280b79f77325bd Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Thu, 8 Feb 2024 12:12:03 -0800 Subject: [PATCH] aider: Remove all data: URLs from slimdown_html. # Aider chat conversation: USER: in `slimdown_html` remove all data: urls ASSISTANT: ```diff --- aider/scrape.py +++ aider/scrape.py @@ ... @@ def slimdown_html(soup): # Remove all tags for img in soup.find_all('img'): img.decompose() + # Remove all elements with data: URLs + for tag in soup.find_all(href=lambda x: x and x.startswith('data:')): + tag.decompose() + for tag in soup.find_all(src=lambda x: x and x.startswith('data:')): + tag.decompose() # Remove all per-element CSS styles for tag in soup.find_all(True): tag.attrs.pop('style', None) # Remove all internal anchor elements for anchor in soup.find_all('a', href=True): if anchor['href'].startswith('#'): anchor.decompose() return soup ``` --- aider/scrape.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/aider/scrape.py b/aider/scrape.py index 1cb48d052..f4c3189e8 100755 --- a/aider/scrape.py +++ b/aider/scrape.py @@ -128,6 +128,11 @@ def slimdown_html(soup): # Remove all tags for img in soup.find_all('img'): img.decompose() + # Remove all elements with data: URLs + for tag in soup.find_all(href=lambda x: x and x.startswith('data:')): + tag.decompose() + for tag in soup.find_all(src=lambda x: x and x.startswith('data:')): + tag.decompose() # Remove all per-element CSS styles for tag in soup.find_all(True): tag.attrs.pop('style', None)