style: format code with linter

This commit is contained in:
Paul Gauthier (aider) 2024-08-12 09:54:06 -07:00
parent 2f4dd04164
commit 5cab55c74b

View file

@ -2,7 +2,6 @@
import re import re
import sys import sys
import re
import pypandoc import pypandoc
@ -104,7 +103,9 @@ class Scraper:
return None return None
# Check if the content is HTML based on MIME type or content # Check if the content is HTML based on MIME type or content
if (mime_type and mime_type.startswith("text/html")) or (mime_type is None and self.looks_like_html(content)): if (mime_type and mime_type.startswith("text/html")) or (
mime_type is None and self.looks_like_html(content)
):
self.try_pandoc() self.try_pandoc()
content = self.html_to_markdown(content) content = self.html_to_markdown(content)
@ -117,13 +118,13 @@ class Scraper:
if isinstance(content, str): if isinstance(content, str):
# Check for common HTML tags # Check for common HTML tags
html_patterns = [ html_patterns = [
r'<!DOCTYPE\s+html', r"<!DOCTYPE\s+html",
r'<html', r"<html",
r'<head', r"<head",
r'<body', r"<body",
r'<div', r"<div",
r'<p>', r"<p>",
r'<a\s+href=', r"<a\s+href=",
] ]
return any(re.search(pattern, content, re.IGNORECASE) for pattern in html_patterns) return any(re.search(pattern, content, re.IGNORECASE) for pattern in html_patterns)
return False return False