defer numpy, bs4 and jsonschema

This commit is contained in:
Paul Gauthier 2024-07-03 13:35:33 -03:00
parent 2dc6735ab4
commit ed35af44b3
3 changed files with 9 additions and 25 deletions

View file

@ -13,7 +13,6 @@ from json.decoder import JSONDecodeError
from pathlib import Path from pathlib import Path
import git import git
from jsonschema import Draft7Validator
from rich.console import Console, Text from rich.console import Console, Text
from rich.markdown import Markdown from rich.markdown import Markdown
@ -346,6 +345,8 @@ class Coder:
# validate the functions jsonschema # validate the functions jsonschema
if self.functions: if self.functions:
from jsonschema import Draft7Validator
for function in self.functions: for function in self.functions:
Draft7Validator.check_schema(function) Draft7Validator.check_schema(function)

View file

@ -5,7 +5,6 @@ import sys
import playwright import playwright
import pypandoc import pypandoc
from bs4 import BeautifulSoup
from playwright.sync_api import sync_playwright from playwright.sync_api import sync_playwright
from aider import __version__, urls from aider import __version__, urls
@ -58,7 +57,6 @@ class Scraper:
self.try_pandoc() self.try_pandoc()
content = self.html_to_markdown(content) content = self.html_to_markdown(content)
# content = html_to_text(content)
return content return content
@ -139,6 +137,8 @@ class Scraper:
self.pandoc_available = True self.pandoc_available = True
def html_to_markdown(self, page_source): def html_to_markdown(self, page_source):
from bs4 import BeautifulSoup
soup = BeautifulSoup(page_source, "html.parser") soup = BeautifulSoup(page_source, "html.parser")
soup = slimdown_html(soup) soup = slimdown_html(soup)
page_source = str(soup) page_source = str(soup)
@ -174,24 +174,6 @@ def slimdown_html(soup):
return soup return soup
# Adapted from AutoGPT, MIT License
#
# https://github.com/Significant-Gravitas/AutoGPT/blob/fe0923ba6c9abb42ac4df79da580e8a4391e0418/autogpts/autogpt/autogpt/commands/web_selenium.py#L173
def html_to_text(page_source: str) -> str:
soup = BeautifulSoup(page_source, "html.parser")
for script in soup(["script", "style"]):
script.extract()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = "\n".join(chunk for chunk in chunks if chunk)
return text
def main(url): def main(url):
scraper = Scraper() scraper = Scraper()
content = scraper.scrape(url) content = scraper.scrape(url)

View file

@ -1,10 +1,9 @@
import math
import os import os
import queue import queue
import tempfile import tempfile
import time import time
import numpy as np
from aider.litellm import litellm from aider.litellm import litellm
try: try:
@ -41,6 +40,8 @@ class Voice:
def callback(self, indata, frames, time, status): def callback(self, indata, frames, time, status):
"""This is called (from a separate thread) for each audio block.""" """This is called (from a separate thread) for each audio block."""
import numpy as np
rms = np.sqrt(np.mean(indata**2)) rms = np.sqrt(np.mean(indata**2))
self.max_rms = max(self.max_rms, rms) self.max_rms = max(self.max_rms, rms)
self.min_rms = min(self.min_rms, rms) self.min_rms = min(self.min_rms, rms)
@ -55,7 +56,7 @@ class Voice:
def get_prompt(self): def get_prompt(self):
num = 10 num = 10
if np.isnan(self.pct) or self.pct < self.threshold: if math.isnan(self.pct) or self.pct < self.threshold:
cnt = 0 cnt = 0
else: else:
cnt = int(self.pct * 10) cnt = int(self.pct * 10)
@ -78,7 +79,7 @@ class Voice:
filename = tempfile.mktemp(suffix=".wav") filename = tempfile.mktemp(suffix=".wav")
try: try:
sample_rate = int(self.sd.query_devices(None, 'input')['default_samplerate']) sample_rate = int(self.sd.query_devices(None, "input")["default_samplerate"])
except (TypeError, ValueError): except (TypeError, ValueError):
sample_rate = 16000 # fallback to 16kHz if unable to query device sample_rate = 16000 # fallback to 16kHz if unable to query device