Configure model settings, adopt litellm bugfix

This commit is contained in:
Paul Gauthier 2024-04-18 09:22:32 -07:00
parent f12bbf1e5b
commit 68888faa6f
4 changed files with 127 additions and 205 deletions

View file

@ -570,6 +570,7 @@ def main(argv=None, input=None, output=None, force_git_root=None):
res = litellm.validate_environment(args.model)
# Is the model known and are all needed keys/params available?
missing_keys = res.get("missing_keys")
if missing_keys:
io.tool_error(f"To use model {args.model}, please set these environment variables:")
@ -580,6 +581,14 @@ def main(argv=None, input=None, output=None, force_git_root=None):
io.tool_error(f"Unknown model {args.model}.")
return 1
# Check in advance that we have model metadata
try:
litellm.get_model_info(args.model)
except Exception as err:
io.tool_error(f"Unknown model {args.model}.")
io.tool_error(str(err))
return 1
main_model = models.Model(args.model)
try:

View file

@ -1,5 +1,6 @@
import json
import math
from dataclasses import dataclass, fields
import litellm
from PIL import Image
@ -7,35 +8,146 @@ from PIL import Image
from aider.dump import dump
@dataclass
class ModelSettings:
name: str
edit_format: str
weak_model_name: str = "gpt-3.5-turbo-0125"
use_repo_map: bool = False
send_undo_reply: bool = False
# https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
# https://platform.openai.com/docs/models/gpt-3-5-turbo
# https://openai.com/pricing
MODEL_SETTINGS = [
# gpt-3.5
ModelSettings(
"gpt-3.5-turbo-0125",
"whole",
),
ModelSettings(
"gpt-3.5-turbo-1106",
"whole",
),
ModelSettings(
"gpt-3.5-turbo-0613",
"whole",
),
ModelSettings(
"gpt-3.5-turbo-16k-0613",
"whole",
),
# gpt-4
ModelSettings(
"gpt-4-turbo-2024-04-09",
"udiff",
use_repo_map=True,
send_undo_reply=True,
),
ModelSettings(
"gpt-4-0125-preview",
"udiff",
use_repo_map=True,
send_undo_reply=True,
),
ModelSettings(
"gpt-4-1106-preview",
"udiff",
use_repo_map=True,
send_undo_reply=True,
),
ModelSettings(
"gpt-4-vision-preview",
"diff",
use_repo_map=True,
send_undo_reply=True,
),
ModelSettings(
"gpt-4-0613",
"diff",
use_repo_map=True,
send_undo_reply=True,
),
ModelSettings(
"gpt-4-32k-0613",
"diff",
use_repo_map=True,
send_undo_reply=True,
),
# Claude
ModelSettings(
"claude-3-opus-20240229",
"udiff",
weak_model_name="claude-3-haiku-20240307",
use_repo_map=True,
send_undo_reply=True,
),
]
ALIASES = {
# gpt-3.5
"gpt-3.5-turbo": "gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k": "gpt-3.5-turbo-16k-0613",
# gpt-4
"gpt-4-turbo": "gpt-4-turbo-2024-04-09",
"gpt-4-turbo-preview": "gpt-4-0125-preview",
"gpt-4": "gpt-4-0613",
"gpt-4-32k": "gpt-4-32k-0613",
}
class Model:
name = None
edit_format = "whole"
weak_model_name = "gpt-3.5-turbo-0125"
edit_format = "whole"
use_repo_map = False
send_undo_reply = False
max_chat_history_tokens = 1024
def __init__(self, model):
self.name = model
self.info = litellm.get_model_info(model)
dump(self.info)
dump(model, self.info)
if self.info.get("max_input_tokens", 0) < 32 * 1024:
self.max_chat_history_tokens = 1024
else:
self.max_chat_history_tokens = 2 * 1024
# TODO: set edit_format,use_repo_map,send_undo_reply for various models
self.configure_model_settings(model)
def configure_model_settings(self, model):
for ms in MODEL_SETTINGS:
# direct match, or match "provider/<model>"
if model == ms.name or model.endswith("/" + ms.name):
for field in fields(ModelSettings):
val = getattr(ms, field.name)
setattr(self, field.name, val)
return # <--
if "gpt-4" in model or "claude-2" in model:
self.edit_format = "diff"
self.use_repo_map = True
self.send_undo_reply = True
return # <--
# use the defaults
def __str__(self):
return self.name
def weak_model(self):
model = "gpt-3.5-turbo-0125"
if self.name == model:
if self.name == self.weak_model_name:
return self
return Model(model)
return Model(self.weak_model_name)
def commit_message_models(self):
return [self.weak_model()]

View file

@ -1,159 +0,0 @@
from dataclasses import dataclass, fields
import tiktoken
from aider.dump import dump # noqa: F401
from .model import Model
@dataclass
class ModelInfo:
name: str
max_context_tokens: int
prompt_price: float
completion_price: float
edit_format: str
always_available: bool = False
use_repo_map: bool = False
send_undo_reply: bool = False
# https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
# https://platform.openai.com/docs/models/gpt-3-5-turbo
# https://openai.com/pricing
openai_models = [
# gpt-3.5
ModelInfo(
"gpt-3.5-turbo-0125",
16385,
0.0005,
0.0015,
"whole",
always_available=True,
),
ModelInfo(
"gpt-3.5-turbo-1106",
16385,
0.0010,
0.0020,
"whole",
always_available=True,
),
ModelInfo(
"gpt-3.5-turbo-0613",
4096,
0.0015,
0.0020,
"whole",
always_available=True,
),
ModelInfo(
"gpt-3.5-turbo-16k-0613",
16385,
0.0030,
0.0040,
"whole",
always_available=True,
),
# gpt-4
ModelInfo(
"gpt-4-turbo-2024-04-09",
128000,
0.01,
0.03,
"udiff",
use_repo_map=True,
send_undo_reply=True,
),
ModelInfo(
"gpt-4-0125-preview",
128000,
0.01,
0.03,
"udiff",
use_repo_map=True,
send_undo_reply=True,
),
ModelInfo(
"gpt-4-1106-preview",
128000,
0.01,
0.03,
"udiff",
use_repo_map=True,
send_undo_reply=True,
),
ModelInfo(
"gpt-4-vision-preview",
128000,
0.01,
0.03,
"diff",
use_repo_map=True,
send_undo_reply=True,
),
ModelInfo(
"gpt-4-0613",
8192,
0.03,
0.06,
"diff",
use_repo_map=True,
send_undo_reply=True,
),
ModelInfo(
"gpt-4-32k-0613",
32768,
0.06,
0.12,
"diff",
use_repo_map=True,
send_undo_reply=True,
),
]
openai_aliases = {
# gpt-3.5
"gpt-3.5-turbo": "gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k": "gpt-3.5-turbo-16k-0613",
# gpt-4
"gpt-4-turbo": "gpt-4-turbo-2024-04-09",
"gpt-4-turbo-preview": "gpt-4-0125-preview",
"gpt-4": "gpt-4-0613",
"gpt-4-32k": "gpt-4-32k-0613",
}
class OpenAIModel(Model):
def __init__(self, name):
true_name = openai_aliases.get(name, name)
try:
self.tokenizer = tiktoken.encoding_for_model(true_name)
except KeyError:
self.tokenizer = None
# raise ValueError(f"No known tokenizer for model: {name}")
model_info = self.lookup_model_info(true_name)
if not model_info:
raise ValueError(f"Unsupported model: {name}")
for field in fields(ModelInfo):
val = getattr(model_info, field.name)
setattr(self, field.name, val)
# restore the caller's specified name
self.name = name
# set the history token limit
if self.max_context_tokens < 32 * 1024:
self.max_chat_history_tokens = 1024
else:
self.max_chat_history_tokens = 2 * 1024
def lookup_model_info(self, name):
for mi in openai_models:
if mi.name == name:
return mi

View file

@ -1,40 +0,0 @@
import tiktoken
from .model import Model
cached_model_details = None
class OpenRouterModel(Model):
def __init__(self, client, name):
if name.startswith("gpt-4") or name.startswith("gpt-3.5-turbo"):
name = "openai/" + name
self.name = name
self.edit_format = edit_format_for_model(name)
self.use_repo_map = self.edit_format == "diff"
# TODO: figure out proper encodings for non openai models
self.tokenizer = tiktoken.get_encoding("cl100k_base")
global cached_model_details
if cached_model_details is None:
cached_model_details = client.models.list().data
found = next(
(details for details in cached_model_details if details.id == name), None
)
if found:
self.max_context_tokens = int(found.context_length)
self.prompt_price = round(float(found.pricing.get("prompt")) * 1000, 6)
self.completion_price = round(float(found.pricing.get("completion")) * 1000, 6)
else:
raise ValueError(f"invalid openrouter model: {name}")
def edit_format_for_model(name):
if any(str in name for str in ["gpt-4", "claude-2"]):
return "diff"
return "whole"