Rationalized openai model attributes #458

2025-05-24 22:34:59 +00:00 · 2024-02-07 12:07:20 -08:00 · 2024-02-07 12:07:20 -08:00 · 35f812859e
commit 35f812859e
parent 873a388693
5 changed files with 151 additions and 91 deletions
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@ -72,12 +72,11 @@ class Coder:

        if not skip_model_availabily_check and not main_model.always_available:
            if not check_model_availability(io, client, main_model):
-                fallback_model = models.GPT35_1106
-                if main_model != models.GPT4:
-                    io.tool_error(
-                        f"API key does not support {main_model.name}, falling back to"
-                        f" {fallback_model.name}"
-                    )
+                fallback_model = models.GPT35_0125
+                io.tool_error(
+                    f"API key does not support {main_model.name}, falling back to"
+                    f" {fallback_model.name}"
+                )
                main_model = fallback_model

        if edit_format is None:
--- a/aider/main.py
+++ b/aider/main.py
@ -149,7 +149,7 @@ def main(argv=None, input=None, output=None, force_git_root=None):
        "--model",
        metavar="MODEL",
        default=models.GPT4_0613.name,
-        help=f"Specify the model to use for the main chat (default: {models.GPT4.name})",
+        help=f"Specify the model to use for the main chat (default: {models.GPT4_0613.name})",
    )
    core_group.add_argument(
        "--skip-model-availability-check",
@ -167,7 +167,7 @@ def main(argv=None, input=None, output=None, force_git_root=None):
        const=default_4_turbo_model,
        help=f"Use {default_4_turbo_model} model for the main chat (gpt-4 is better)",
    )
-    default_3_model = models.GPT35_1106
+    default_3_model = models.GPT35_0125
    core_group.add_argument(
        "-3",
        action="store_const",
--- a/aider/models/init.py
+++ b/aider/models/init.py
@ -5,13 +5,12 @@ from .openrouter import OpenRouterModel
 GPT4 = Model.create("gpt-4")
 GPT4_0613 = Model.create("gpt-4-0613")
 GPT35 = Model.create("gpt-3.5-turbo")
-GPT35_1106 = Model.create("gpt-3.5-turbo-1106")
-GPT35_16k = Model.create("gpt-3.5-turbo-16k")
+GPT35_0125 = Model.create("gpt-3.5-turbo-0125")

 __all__ = [
    OpenAIModel,
    OpenRouterModel,
    GPT4,
    GPT35,
-    GPT35_16k,
+    GPT35_0125,
 ]
--- a/aider/models/openai.py
+++ b/aider/models/openai.py
@ -1,87 +1,148 @@
-import re
+from dataclasses import dataclass, fields

 import tiktoken

+from aider.dump import dump
+
 from .model import Model

-known_tokens = {
-    "gpt-3.5-turbo": 4,
-    "gpt-4": 8,
-    "gpt-4-1106-preview": 128,
-    "gpt-4-0125-preview": 128,
-    "gpt-4-turbo-preview": 128,
-    "gpt-3.5-turbo-1106": 16,
+
+@dataclass
+class ModelInfo:
+    name: str
+    max_context_tokens: int
+    prompt_price: float
+    completions_price: float
+    edit_format: str
+    always_available: bool = False
+    use_repo_map: bool = False
+    send_undo_reply: bool = False
+
+
+openai_models = [
+    # gpt-3.5
+    ModelInfo(
+        "gpt-3.5-turbo-0125",
+        16385,
+        0.0005,
+        0.0015,
+        "whole",
+        always_available=True,
+    ),
+    ModelInfo(
+        "gpt-3.5-turbo-1106",
+        16385,
+        0.0010,
+        0.0020,
+        "whole",
+        always_available=True,
+    ),
+    ModelInfo(
+        "gpt-3.5-turbo-0613",
+        4096,
+        0.0015,
+        0.0020,
+        "whole",
+        always_available=True,
+    ),
+    ModelInfo(
+        "gpt-3.5-turbo-16k-0613",
+        16385,
+        0.0030,
+        0.0040,
+        "whole",
+        always_available=True,
+    ),
+    # gpt-4
+    ModelInfo(
+        "gpt-4-0125-preview",
+        128000,
+        0.01,
+        0.03,
+        "udiff",
+        use_repo_map=True,
+        send_undo_reply=True,
+    ),
+    ModelInfo(
+        "gpt-4-1106-preview",
+        128000,
+        0.01,
+        0.03,
+        "udiff",
+        use_repo_map=True,
+        send_undo_reply=True,
+    ),
+    ModelInfo(
+        "gpt-4-vision-preview",
+        128000,
+        0.01,
+        0.03,
+        "diff",
+        use_repo_map=True,
+        send_undo_reply=True,
+    ),
+    ModelInfo(
+        "gpt-4-0613",
+        8192,
+        0.03,
+        0.06,
+        "diff",
+        use_repo_map=True,
+        send_undo_reply=True,
+    ),
+    ModelInfo(
+        "gpt-4-32k-0613",
+        32768,
+        0.06,
+        0.12,
+        "diff",
+        use_repo_map=True,
+        send_undo_reply=True,
+    ),
+]
+
+openai_aliases = {
+    # gpt-3.5
+    "gpt-3.5-turbo": "gpt-3.5-turbo-0613",
+    "gpt-3.5-turbo-16k": "gpt-3.5-turbo-16k-0613",
+    # gpt-4
+    "gpt-4-turbo-preview": "gpt-4-0125-preview",
+    "gpt-4": "gpt-4-0613",
+    "gpt-4-32k": "gpt-4-32k-0613",
 }


 class OpenAIModel(Model):
    def __init__(self, name):
+        true_name = openai_aliases.get(name, name)
+
+        try:
+            self.tokenizer = tiktoken.encoding_for_model(true_name)
+        except KeyError:
+            raise ValueError(f"No known tokenizer for model: {name}")
+
+        model_info = self.lookup_model_info(true_name)
+        if not model_info:
+            raise ValueError(f"Unsupported model: {name}")
+
+        print()
+        dump(name)
+        dump(true_name)
+        for field in fields(ModelInfo):
+            val = getattr(model_info, field.name)
+            setattr(self, field.name, val)
+            dump(field.name, val)
+
+        # restore the caller's specified name
        self.name = name

-        tokens = None
-
-        match = re.search(r"-([0-9]+)k", name)
-        if match:
-            tokens = int(match.group(1))
+        # set the history token limit
+        if self.max_context_tokens < 32 * 1024:
+            self.max_chat_history_tokens = 1024
        else:
-            for m, t in known_tokens.items():
-                if name.startswith(m):
-                    tokens = t
+            self.max_chat_history_tokens = 2 * 1024

-        if tokens is None:
-            raise ValueError(f"Unknown context window size for model: {name}")
-
-        self.max_context_tokens = tokens * 1024
-        self.tokenizer = tiktoken.encoding_for_model(name)
-
-        if self.is_gpt4():
-            if name in ("gpt-4-1106-preview", "gpt-4-0125-preview", "gpt-4-turbo-preview"):
-                self.edit_format = "udiff"
-            else:
-                self.edit_format = "diff"
-
-            self.use_repo_map = True
-            self.send_undo_reply = True
-
-            if tokens == 8:
-                self.prompt_price = 0.03
-                self.completion_price = 0.06
-                self.max_chat_history_tokens = 1024
-            elif tokens == 32:
-                self.prompt_price = 0.06
-                self.completion_price = 0.12
-                self.max_chat_history_tokens = 2 * 1024
-            elif tokens == 128:
-                self.prompt_price = 0.01
-                self.completion_price = 0.03
-                self.max_chat_history_tokens = 2 * 1024
-
-            return
-
-        if self.is_gpt35():
-            self.edit_format = "whole"
-            self.always_available = True
-            self.send_undo_reply = False
-
-            if self.name == "gpt-3.5-turbo-1106":
-                self.prompt_price = 0.001
-                self.completion_price = 0.002
-                self.max_chat_history_tokens = 2 * 1024
-            elif tokens == 4:
-                self.prompt_price = 0.0015
-                self.completion_price = 0.002
-                self.max_chat_history_tokens = 1024
-            elif tokens == 16:
-                self.prompt_price = 0.003
-                self.completion_price = 0.004
-                self.max_chat_history_tokens = 2 * 1024
-
-            return
-
-        raise ValueError(f"Unsupported model: {name}")
-
-    def is_gpt4(self):
-        return self.name.startswith("gpt-4")
-
-    def is_gpt35(self):
-        return self.name.startswith("gpt-3.5-turbo")
+    def lookup_model_info(self, name):
+        for mi in openai_models:
+            if mi.name == name:
+                return mi
--- a/tests/test_models.py
+++ b/tests/test_models.py
@ -10,10 +10,10 @@ class TestModels(unittest.TestCase):
        self.assertEqual(model.max_context_tokens, 4 * 1024)

        model = Model.create("gpt-3.5-turbo-16k")
-        self.assertEqual(model.max_context_tokens, 16 * 1024)
+        self.assertEqual(model.max_context_tokens, 16385)

        model = Model.create("gpt-3.5-turbo-1106")
-        self.assertEqual(model.max_context_tokens, 16 * 1024)
+        self.assertEqual(model.max_context_tokens, 16385)

        model = Model.create("gpt-4")
        self.assertEqual(model.max_context_tokens, 8 * 1024)
@ -21,14 +21,12 @@ class TestModels(unittest.TestCase):
        model = Model.create("gpt-4-32k")
        self.assertEqual(model.max_context_tokens, 32 * 1024)

-        model = Model.create("gpt-4-0101")
+        model = Model.create("gpt-4-0613")
        self.assertEqual(model.max_context_tokens, 8 * 1024)

-        model = Model.create("gpt-4-32k-2123")
-        self.assertEqual(model.max_context_tokens, 32 * 1024)
-
    def test_openrouter_model_properties(self):
        client = MagicMock()
+
        class ModelData:
            def __init__(self, id, object, context_length, pricing):
                self.id = id
@ -36,7 +34,10 @@ class TestModels(unittest.TestCase):
                self.context_length = context_length
                self.pricing = pricing

-        model_data = ModelData("openai/gpt-4", "model", "8192", {"prompt": "0.00006", "completion": "0.00012"})
+        model_data = ModelData(
+            "openai/gpt-4", "model", "8192", {"prompt": "0.00006", "completion": "0.00012"}
+        )
+
        class ModelList:
            def __init__(self, data):
                self.data = data