mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-29 08:44:59 +00:00
Change html_to_text to html_to_markdown and add slimdown_html function.
This commit is contained in:
parent
1509e21698
commit
1520bb976a
1 changed files with 7 additions and 2 deletions
|
@ -95,7 +95,8 @@ class Scraper:
|
|||
content = self.scrape_with_httpx(url)
|
||||
|
||||
if content:
|
||||
content = html_to_text(content)
|
||||
content = html_to_markdown(content)
|
||||
#content = html_to_text(content)
|
||||
|
||||
return content
|
||||
|
||||
|
@ -118,8 +119,12 @@ def html_to_text(page_source: str) -> str:
|
|||
return text
|
||||
|
||||
|
||||
def slimdown_html(page_source: str) -> str:
|
||||
soup = BeautifulSoup(page_source, "html.parser")
|
||||
# ...
|
||||
|
||||
def html_to_markdown(page_source: str) -> str:
|
||||
return pypandoc.convert_text(page_source, 'md', format='html')
|
||||
return pypandoc.convert_text(page_source, 'markdown', format='html')
|
||||
|
||||
def main(url):
|
||||
scraper = Scraper()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue