Lazily import litellm to shave >1sec off the initial load time of aider

This commit is contained in:
Paul Gauthier 2024-07-03 12:45:53 -03:00
parent e5e2535f59
commit ee203deef0
3 changed files with 111 additions and 32 deletions

View file

@ -6,9 +6,23 @@ warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
os.environ["OR_SITE_URL"] = "http://aider.chat"
os.environ["OR_APP_NAME"] = "Aider"
import litellm # noqa: E402
# `import litellm` takes 1.5 seconds, defer it!
litellm.suppress_debug_info = True
litellm.set_verbose = False
class LazyLiteLLM:
def __init__(self):
self._lazy_module = None
def __getattr__(self, name):
if self._lazy_module is None:
self._lazy_module = __import__("litellm")
self._lazy_module.suppress_debug_info = True
self._lazy_module.set_verbose = False
return getattr(self._lazy_module, name)
litellm = LazyLiteLLM()
__all__ = [litellm]

View file

@ -15,6 +15,44 @@ from aider.litellm import litellm
DEFAULT_MODEL_NAME = "gpt-4o"
OPENAI_MODELS = """
gpt-4
gpt-4o
gpt-4o-2024-05-13
gpt-4-turbo-preview
gpt-4-0314
gpt-4-0613
gpt-4-32k
gpt-4-32k-0314
gpt-4-32k-0613
gpt-4-turbo
gpt-4-turbo-2024-04-09
gpt-4-1106-preview
gpt-4-0125-preview
gpt-4-vision-preview
gpt-4-1106-vision-preview
gpt-3.5-turbo
gpt-3.5-turbo-0301
gpt-3.5-turbo-0613
gpt-3.5-turbo-1106
gpt-3.5-turbo-0125
gpt-3.5-turbo-16k
gpt-3.5-turbo-16k-0613
"""
OPENAI_MODELS = [ln.strip for ln in OPENAI_MODELS.splitlines() if ln.strip()]
ANTHROPIC_MODELS = """
claude-2
claude-2.1
claude-3-haiku-20240307
claude-3-opus-20240229
claude-3-sonnet-20240229
claude-3-5-sonnet-20240620
"""
ANTHROPIC_MODELS = [ln.strip for ln in ANTHROPIC_MODELS.splitlines() if ln.strip()]
@dataclass
class ModelSettings:
@ -491,7 +529,25 @@ class Model:
with Image.open(fname) as img:
return img.size
def fast_validate_environment(self):
"""Fast path for common models. Avoids forcing litellm import."""
model = self.name
if model in OPENAI_MODELS:
var = "OPENAI_API_KEY"
elif model in ANTHROPIC_MODELS:
var = "ANTHROPIC_API_KEY"
else:
return
if os.environ.get(var):
return dict(keys_in_environment=[var], missing_keys=[])
def validate_environment(self):
res = self.fast_validate_environment()
if res:
return res
# https://github.com/BerriAI/litellm/issues/3190
model = self.name

View file

@ -15,40 +15,49 @@ CACHE = None
# CACHE = Cache(CACHE_PATH)
def should_giveup(e):
if not hasattr(e, "status_code"):
return False
def lazy_litellm_retry_decorator(func):
def wrapper(*args, **kwargs):
def should_giveup(e):
if not hasattr(e, "status_code"):
return False
if type(e) in (
httpx.ConnectError,
httpx.RemoteProtocolError,
httpx.ReadTimeout,
):
return False
if type(e) in (
httpx.ConnectError,
httpx.RemoteProtocolError,
httpx.ReadTimeout,
):
return False
return not litellm._should_retry(e.status_code)
return not litellm._should_retry(e.status_code)
decorated_func = backoff.on_exception(
backoff.expo,
(
httpx.ConnectError,
httpx.RemoteProtocolError,
httpx.ReadTimeout,
litellm.exceptions.APIConnectionError,
litellm.exceptions.APIError,
litellm.exceptions.RateLimitError,
litellm.exceptions.ServiceUnavailableError,
litellm.exceptions.Timeout,
litellm.llms.anthropic.AnthropicError,
),
giveup=should_giveup,
max_time=60,
on_backoff=lambda details: print(
f"{details.get('exception','Exception')}\nRetry in {details['wait']:.1f} seconds."
),
)(func)
return decorated_func(*args, **kwargs)
return wrapper
@backoff.on_exception(
backoff.expo,
(
httpx.ConnectError,
httpx.RemoteProtocolError,
httpx.ReadTimeout,
litellm.exceptions.APIConnectionError,
litellm.exceptions.APIError,
litellm.exceptions.RateLimitError,
litellm.exceptions.ServiceUnavailableError,
litellm.exceptions.Timeout,
litellm.llms.anthropic.AnthropicError,
),
giveup=should_giveup,
max_time=60,
on_backoff=lambda details: print(
f"{details.get('exception','Exception')}\nRetry in {details['wait']:.1f} seconds."
),
)
@lazy_litellm_retry_decorator
def send_with_retries(model_name, messages, functions, stream, temperature=0):
from aider.litellm import litellm
kwargs = dict(
model=model_name,
messages=messages,