Rationalized openai model attributes #458

This commit is contained in:
Paul Gauthier 2024-02-07 12:07:20 -08:00
parent 873a388693
commit 35f812859e
5 changed files with 151 additions and 91 deletions

View file

@ -72,12 +72,11 @@ class Coder:
if not skip_model_availabily_check and not main_model.always_available:
if not check_model_availability(io, client, main_model):
fallback_model = models.GPT35_1106
if main_model != models.GPT4:
io.tool_error(
f"API key does not support {main_model.name}, falling back to"
f" {fallback_model.name}"
)
fallback_model = models.GPT35_0125
io.tool_error(
f"API key does not support {main_model.name}, falling back to"
f" {fallback_model.name}"
)
main_model = fallback_model
if edit_format is None:

View file

@ -149,7 +149,7 @@ def main(argv=None, input=None, output=None, force_git_root=None):
"--model",
metavar="MODEL",
default=models.GPT4_0613.name,
help=f"Specify the model to use for the main chat (default: {models.GPT4.name})",
help=f"Specify the model to use for the main chat (default: {models.GPT4_0613.name})",
)
core_group.add_argument(
"--skip-model-availability-check",
@ -167,7 +167,7 @@ def main(argv=None, input=None, output=None, force_git_root=None):
const=default_4_turbo_model,
help=f"Use {default_4_turbo_model} model for the main chat (gpt-4 is better)",
)
default_3_model = models.GPT35_1106
default_3_model = models.GPT35_0125
core_group.add_argument(
"-3",
action="store_const",

View file

@ -5,13 +5,12 @@ from .openrouter import OpenRouterModel
GPT4 = Model.create("gpt-4")
GPT4_0613 = Model.create("gpt-4-0613")
GPT35 = Model.create("gpt-3.5-turbo")
GPT35_1106 = Model.create("gpt-3.5-turbo-1106")
GPT35_16k = Model.create("gpt-3.5-turbo-16k")
GPT35_0125 = Model.create("gpt-3.5-turbo-0125")
__all__ = [
OpenAIModel,
OpenRouterModel,
GPT4,
GPT35,
GPT35_16k,
GPT35_0125,
]

View file

@ -1,87 +1,148 @@
import re
from dataclasses import dataclass, fields
import tiktoken
from aider.dump import dump
from .model import Model
known_tokens = {
"gpt-3.5-turbo": 4,
"gpt-4": 8,
"gpt-4-1106-preview": 128,
"gpt-4-0125-preview": 128,
"gpt-4-turbo-preview": 128,
"gpt-3.5-turbo-1106": 16,
@dataclass
class ModelInfo:
name: str
max_context_tokens: int
prompt_price: float
completions_price: float
edit_format: str
always_available: bool = False
use_repo_map: bool = False
send_undo_reply: bool = False
openai_models = [
# gpt-3.5
ModelInfo(
"gpt-3.5-turbo-0125",
16385,
0.0005,
0.0015,
"whole",
always_available=True,
),
ModelInfo(
"gpt-3.5-turbo-1106",
16385,
0.0010,
0.0020,
"whole",
always_available=True,
),
ModelInfo(
"gpt-3.5-turbo-0613",
4096,
0.0015,
0.0020,
"whole",
always_available=True,
),
ModelInfo(
"gpt-3.5-turbo-16k-0613",
16385,
0.0030,
0.0040,
"whole",
always_available=True,
),
# gpt-4
ModelInfo(
"gpt-4-0125-preview",
128000,
0.01,
0.03,
"udiff",
use_repo_map=True,
send_undo_reply=True,
),
ModelInfo(
"gpt-4-1106-preview",
128000,
0.01,
0.03,
"udiff",
use_repo_map=True,
send_undo_reply=True,
),
ModelInfo(
"gpt-4-vision-preview",
128000,
0.01,
0.03,
"diff",
use_repo_map=True,
send_undo_reply=True,
),
ModelInfo(
"gpt-4-0613",
8192,
0.03,
0.06,
"diff",
use_repo_map=True,
send_undo_reply=True,
),
ModelInfo(
"gpt-4-32k-0613",
32768,
0.06,
0.12,
"diff",
use_repo_map=True,
send_undo_reply=True,
),
]
openai_aliases = {
# gpt-3.5
"gpt-3.5-turbo": "gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k": "gpt-3.5-turbo-16k-0613",
# gpt-4
"gpt-4-turbo-preview": "gpt-4-0125-preview",
"gpt-4": "gpt-4-0613",
"gpt-4-32k": "gpt-4-32k-0613",
}
class OpenAIModel(Model):
def __init__(self, name):
true_name = openai_aliases.get(name, name)
try:
self.tokenizer = tiktoken.encoding_for_model(true_name)
except KeyError:
raise ValueError(f"No known tokenizer for model: {name}")
model_info = self.lookup_model_info(true_name)
if not model_info:
raise ValueError(f"Unsupported model: {name}")
print()
dump(name)
dump(true_name)
for field in fields(ModelInfo):
val = getattr(model_info, field.name)
setattr(self, field.name, val)
dump(field.name, val)
# restore the caller's specified name
self.name = name
tokens = None
match = re.search(r"-([0-9]+)k", name)
if match:
tokens = int(match.group(1))
# set the history token limit
if self.max_context_tokens < 32 * 1024:
self.max_chat_history_tokens = 1024
else:
for m, t in known_tokens.items():
if name.startswith(m):
tokens = t
self.max_chat_history_tokens = 2 * 1024
if tokens is None:
raise ValueError(f"Unknown context window size for model: {name}")
self.max_context_tokens = tokens * 1024
self.tokenizer = tiktoken.encoding_for_model(name)
if self.is_gpt4():
if name in ("gpt-4-1106-preview", "gpt-4-0125-preview", "gpt-4-turbo-preview"):
self.edit_format = "udiff"
else:
self.edit_format = "diff"
self.use_repo_map = True
self.send_undo_reply = True
if tokens == 8:
self.prompt_price = 0.03
self.completion_price = 0.06
self.max_chat_history_tokens = 1024
elif tokens == 32:
self.prompt_price = 0.06
self.completion_price = 0.12
self.max_chat_history_tokens = 2 * 1024
elif tokens == 128:
self.prompt_price = 0.01
self.completion_price = 0.03
self.max_chat_history_tokens = 2 * 1024
return
if self.is_gpt35():
self.edit_format = "whole"
self.always_available = True
self.send_undo_reply = False
if self.name == "gpt-3.5-turbo-1106":
self.prompt_price = 0.001
self.completion_price = 0.002
self.max_chat_history_tokens = 2 * 1024
elif tokens == 4:
self.prompt_price = 0.0015
self.completion_price = 0.002
self.max_chat_history_tokens = 1024
elif tokens == 16:
self.prompt_price = 0.003
self.completion_price = 0.004
self.max_chat_history_tokens = 2 * 1024
return
raise ValueError(f"Unsupported model: {name}")
def is_gpt4(self):
return self.name.startswith("gpt-4")
def is_gpt35(self):
return self.name.startswith("gpt-3.5-turbo")
def lookup_model_info(self, name):
for mi in openai_models:
if mi.name == name:
return mi

View file

@ -10,10 +10,10 @@ class TestModels(unittest.TestCase):
self.assertEqual(model.max_context_tokens, 4 * 1024)
model = Model.create("gpt-3.5-turbo-16k")
self.assertEqual(model.max_context_tokens, 16 * 1024)
self.assertEqual(model.max_context_tokens, 16385)
model = Model.create("gpt-3.5-turbo-1106")
self.assertEqual(model.max_context_tokens, 16 * 1024)
self.assertEqual(model.max_context_tokens, 16385)
model = Model.create("gpt-4")
self.assertEqual(model.max_context_tokens, 8 * 1024)
@ -21,14 +21,12 @@ class TestModels(unittest.TestCase):
model = Model.create("gpt-4-32k")
self.assertEqual(model.max_context_tokens, 32 * 1024)
model = Model.create("gpt-4-0101")
model = Model.create("gpt-4-0613")
self.assertEqual(model.max_context_tokens, 8 * 1024)
model = Model.create("gpt-4-32k-2123")
self.assertEqual(model.max_context_tokens, 32 * 1024)
def test_openrouter_model_properties(self):
client = MagicMock()
class ModelData:
def __init__(self, id, object, context_length, pricing):
self.id = id
@ -36,7 +34,10 @@ class TestModels(unittest.TestCase):
self.context_length = context_length
self.pricing = pricing
model_data = ModelData("openai/gpt-4", "model", "8192", {"prompt": "0.00006", "completion": "0.00012"})
model_data = ModelData(
"openai/gpt-4", "model", "8192", {"prompt": "0.00006", "completion": "0.00012"}
)
class ModelList:
def __init__(self, data):
self.data = data