Rationalized openai model attributes #458

2025-05-24 22:34:59 +00:00 · 2024-02-07 12:07:20 -08:00 · 2024-02-07 12:07:20 -08:00 · 35f812859e
commit 35f812859e
parent 873a388693
5 changed files with 151 additions and 91 deletions
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@ -72,12 +72,11 @@ class Coder:
        if not skip_model_availabily_check and not main_model.always_available:
            if not check_model_availability(io, client, main_model):
-                fallback_model = models.GPT35_1106
+                fallback_model = models.GPT35_0125
-                if main_model != models.GPT4:
+                io.tool_error(
-                    io.tool_error(
+                    f"API key does not support {main_model.name}, falling back to"
-                        f"API key does not support {main_model.name}, falling back to"
+                    f" {fallback_model.name}"
-                        f" {fallback_model.name}"
+                )
                    )
                main_model = fallback_model
        if edit_format is None:
--- a/aider/main.py
+++ b/aider/main.py
@ -149,7 +149,7 @@ def main(argv=None, input=None, output=None, force_git_root=None):
        "--model",
        metavar="MODEL",
        default=models.GPT4_0613.name,
-        help=f"Specify the model to use for the main chat (default: {models.GPT4.name})",
+        help=f"Specify the model to use for the main chat (default: {models.GPT4_0613.name})",
    )
    core_group.add_argument(
        "--skip-model-availability-check",
@ -167,7 +167,7 @@ def main(argv=None, input=None, output=None, force_git_root=None):
        const=default_4_turbo_model,
        help=f"Use {default_4_turbo_model} model for the main chat (gpt-4 is better)",
    )
-    default_3_model = models.GPT35_1106
+    default_3_model = models.GPT35_0125
    core_group.add_argument(
        "-3",
        action="store_const",
--- a/aider/models/init.py
+++ b/aider/models/init.py
@ -5,13 +5,12 @@ from .openrouter import OpenRouterModel
 GPT4 = Model.create("gpt-4")
 GPT4_0613 = Model.create("gpt-4-0613")
 GPT35 = Model.create("gpt-3.5-turbo")
-GPT35_1106 = Model.create("gpt-3.5-turbo-1106")
+GPT35_0125 = Model.create("gpt-3.5-turbo-0125")
 GPT35_16k = Model.create("gpt-3.5-turbo-16k")
 __all__ = [
    OpenAIModel,
    OpenRouterModel,
    GPT4,
    GPT35,
-    GPT35_16k,
+    GPT35_0125,
 ]
--- a/aider/models/openai.py
+++ b/aider/models/openai.py
@ -1,87 +1,148 @@
-import re
+from dataclasses import dataclass, fields
 import tiktoken
 from aider.dump import dump
 from .model import Model
-known_tokens = {
+
-    "gpt-3.5-turbo": 4,
+@dataclass
-    "gpt-4": 8,
+class ModelInfo:
-    "gpt-4-1106-preview": 128,
+    name: str
-    "gpt-4-0125-preview": 128,
+    max_context_tokens: int
-    "gpt-4-turbo-preview": 128,
+    prompt_price: float
-    "gpt-3.5-turbo-1106": 16,
+    completions_price: float
    edit_format: str
    always_available: bool = False
    use_repo_map: bool = False
    send_undo_reply: bool = False
 openai_models = [
    # gpt-3.5
    ModelInfo(
        "gpt-3.5-turbo-0125",
        16385,
        0.0005,
        0.0015,
        "whole",
        always_available=True,
    ),
    ModelInfo(
        "gpt-3.5-turbo-1106",
        16385,
        0.0010,
        0.0020,
        "whole",
        always_available=True,
    ),
    ModelInfo(
        "gpt-3.5-turbo-0613",
        4096,
        0.0015,
        0.0020,
        "whole",
        always_available=True,
    ),
    ModelInfo(
        "gpt-3.5-turbo-16k-0613",
        16385,
        0.0030,
        0.0040,
        "whole",
        always_available=True,
    ),
    # gpt-4
    ModelInfo(
        "gpt-4-0125-preview",
        128000,
        0.01,
        0.03,
        "udiff",
        use_repo_map=True,
        send_undo_reply=True,
    ),
    ModelInfo(
        "gpt-4-1106-preview",
        128000,
        0.01,
        0.03,
        "udiff",
        use_repo_map=True,
        send_undo_reply=True,
    ),
    ModelInfo(
        "gpt-4-vision-preview",
        128000,
        0.01,
        0.03,
        "diff",
        use_repo_map=True,
        send_undo_reply=True,
    ),
    ModelInfo(
        "gpt-4-0613",
        8192,
        0.03,
        0.06,
        "diff",
        use_repo_map=True,
        send_undo_reply=True,
    ),
    ModelInfo(
        "gpt-4-32k-0613",
        32768,
        0.06,
        0.12,
        "diff",
        use_repo_map=True,
        send_undo_reply=True,
    ),
 ]
 openai_aliases = {
    # gpt-3.5
    "gpt-3.5-turbo": "gpt-3.5-turbo-0613",
    "gpt-3.5-turbo-16k": "gpt-3.5-turbo-16k-0613",
    # gpt-4
    "gpt-4-turbo-preview": "gpt-4-0125-preview",
    "gpt-4": "gpt-4-0613",
    "gpt-4-32k": "gpt-4-32k-0613",
 }
 class OpenAIModel(Model):
    def __init__(self, name):
        true_name = openai_aliases.get(name, name)
        try:
            self.tokenizer = tiktoken.encoding_for_model(true_name)
        except KeyError:
            raise ValueError(f"No known tokenizer for model: {name}")
        model_info = self.lookup_model_info(true_name)
        if not model_info:
            raise ValueError(f"Unsupported model: {name}")
        print()
        dump(name)
        dump(true_name)
        for field in fields(ModelInfo):
            val = getattr(model_info, field.name)
            setattr(self, field.name, val)
            dump(field.name, val)
        # restore the caller's specified name
        self.name = name
-        tokens = None
+        # set the history token limit
-
+        if self.max_context_tokens < 32 * 1024:
-        match = re.search(r"-([0-9]+)k", name)
+            self.max_chat_history_tokens = 1024
        if match:
            tokens = int(match.group(1))
        else:
-            for m, t in known_tokens.items():
+            self.max_chat_history_tokens = 2 * 1024
                if name.startswith(m):
                    tokens = t
-        if tokens is None:
+    def lookup_model_info(self, name):
-            raise ValueError(f"Unknown context window size for model: {name}")
+        for mi in openai_models:
-
+            if mi.name == name:
-        self.max_context_tokens = tokens * 1024
+                return mi
        self.tokenizer = tiktoken.encoding_for_model(name)
        if self.is_gpt4():
            if name in ("gpt-4-1106-preview", "gpt-4-0125-preview", "gpt-4-turbo-preview"):
                self.edit_format = "udiff"
            else:
                self.edit_format = "diff"
            self.use_repo_map = True
            self.send_undo_reply = True
            if tokens == 8:
                self.prompt_price = 0.03
                self.completion_price = 0.06
                self.max_chat_history_tokens = 1024
            elif tokens == 32:
                self.prompt_price = 0.06
                self.completion_price = 0.12
                self.max_chat_history_tokens = 2 * 1024
            elif tokens == 128:
                self.prompt_price = 0.01
                self.completion_price = 0.03
                self.max_chat_history_tokens = 2 * 1024
            return
        if self.is_gpt35():
            self.edit_format = "whole"
            self.always_available = True
            self.send_undo_reply = False
            if self.name == "gpt-3.5-turbo-1106":
                self.prompt_price = 0.001
                self.completion_price = 0.002
                self.max_chat_history_tokens = 2 * 1024
            elif tokens == 4:
                self.prompt_price = 0.0015
                self.completion_price = 0.002
                self.max_chat_history_tokens = 1024
            elif tokens == 16:
                self.prompt_price = 0.003
                self.completion_price = 0.004
                self.max_chat_history_tokens = 2 * 1024
            return
        raise ValueError(f"Unsupported model: {name}")
    def is_gpt4(self):
        return self.name.startswith("gpt-4")
    def is_gpt35(self):
        return self.name.startswith("gpt-3.5-turbo")
--- a/tests/test_models.py
+++ b/tests/test_models.py
@ -10,10 +10,10 @@ class TestModels(unittest.TestCase):
        self.assertEqual(model.max_context_tokens, 4 * 1024)
        model = Model.create("gpt-3.5-turbo-16k")
-        self.assertEqual(model.max_context_tokens, 16 * 1024)
+        self.assertEqual(model.max_context_tokens, 16385)
        model = Model.create("gpt-3.5-turbo-1106")
-        self.assertEqual(model.max_context_tokens, 16 * 1024)
+        self.assertEqual(model.max_context_tokens, 16385)
        model = Model.create("gpt-4")
        self.assertEqual(model.max_context_tokens, 8 * 1024)
@ -21,14 +21,12 @@ class TestModels(unittest.TestCase):
        model = Model.create("gpt-4-32k")
        self.assertEqual(model.max_context_tokens, 32 * 1024)
-        model = Model.create("gpt-4-0101")
+        model = Model.create("gpt-4-0613")
        self.assertEqual(model.max_context_tokens, 8 * 1024)
        model = Model.create("gpt-4-32k-2123")
        self.assertEqual(model.max_context_tokens, 32 * 1024)
    def test_openrouter_model_properties(self):
        client = MagicMock()
        class ModelData:
            def __init__(self, id, object, context_length, pricing):
                self.id = id
@ -36,7 +34,10 @@ class TestModels(unittest.TestCase):
                self.context_length = context_length
                self.pricing = pricing
-        model_data = ModelData("openai/gpt-4", "model", "8192", {"prompt": "0.00006", "completion": "0.00012"})
+        model_data = ModelData(
            "openai/gpt-4", "model", "8192", {"prompt": "0.00006", "completion": "0.00012"}
        )
        class ModelList:
            def __init__(self, data):
                self.data = data