Better unknown model warnings

2025-05-31 01:35:00 +00:00 · 2024-04-22 14:07:32 -07:00 · 2024-04-22 14:07:32 -07:00 · efd3c39e50
commit efd3c39e50
parent f1ce673f78
4 changed files with 125 additions and 69 deletions
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@ -137,10 +137,13 @@ class Coder:
        self.main_model = main_model

        weak_model = main_model.weak_model
-        self.io.tool_output(
-            f"Models: {main_model.name} with {self.edit_format} edit format, weak model"
-            f" {weak_model.name}"
-        )
+        prefix = "Model:"
+        output = f" {main_model.name} with {self.edit_format} edit format"
+        if weak_model is not main_model:
+            prefix = "Models:"
+            output += f", weak model {weak_model.name}"
+
+        self.io.tool_output(prefix + output)

        self.show_diffs = show_diffs

--- a/aider/main.py
+++ b/aider/main.py
@ -21,6 +21,36 @@ os.environ["OR_SITE_URL"] = "http://aider.chat"
 os.environ["OR_APP_NAME"] = "Aider"


+def sanity_check_model(io, model):
+    show = False
+
+    if model.missing_keys:
+        show = True
+        io.tool_error(f"Model {model}: Missing these environment variables:")
+        for key in model.missing_keys:
+            io.tool_error(f"- {key}")
+    elif not model.keys_in_environment:
+        show = True
+        io.tool_error(f"Model {model}: Unknown which environment variables are required.")
+
+    if not model.info:
+        show = True
+        io.tool_error(
+            f"Model {model}: Unknown model, context window size and token costs unavailable."
+        )
+
+        possible_matches = models.fuzzy_match_models(model.name)
+        if possible_matches:
+            io.tool_error("Did you mean one of these?")
+            for match in possible_matches:
+                io.tool_error(f"- {match}")
+
+    if show:
+        io.tool_error("For more info see https://aider.chat/docs/llms.html#model-warnings")
+
+    return False
+
+
 def get_git_root():
    """Try and guess the git repo, since the conf.yml can be at the repo root"""
    try:
@ -277,7 +307,7 @@ def main(argv=None, input=None, output=None, force_git_root=None):
    model_group.add_argument(
        "--require-model-info",
        action=argparse.BooleanOptionalAction,
-        default=True,
+        default=False,
        help="Only work with models that have meta-data available (default: True)",
    )
    model_group.add_argument(
@ -602,13 +632,15 @@ def main(argv=None, input=None, output=None, force_git_root=None):
    if args.openai_organization_id:
        os.environ["OPENAI_ORGANIZATION"] = args.openai_organization_id

-    # Check in advance that we have model metadata
-    try:
-        main_model = models.Model(
-            args.model, weak_model=args.weak_model, require_model_info=args.require_model_info
-        )
-    except (models.NoModelInfo, models.ModelEnvironmentError) as err:
-        io.tool_error(str(err))
+    main_model = models.Model(args.model, weak_model=args.weak_model)
+
+    missing_model_info = False
+    if not sanity_check_model(io, main_model):
+        missing_model_info = True
+    if main_model.weak_model and main_model.weak_model is not main_model:
+        if not sanity_check_model(io, main_model.weak_model):
+            missing_model_info = True
+    if args.require_model_info and missing_model_info:
        return 1

    try:
--- a/aider/models.py
+++ b/aider/models.py
@ -13,24 +13,6 @@ from aider.dump import dump  # noqa: F401
 DEFAULT_MODEL_NAME = "gpt-4-1106-preview"


-class NoModelInfo(Exception):
-    """
-    Exception raised when model information cannot be retrieved.
-    """
-
-    def __init__(self, model):
-        super().__init__(check_model_name(model))
-
-
-class ModelEnvironmentError(Exception):
-    """
-    Exception raised when the environment isn't setup for the model
-    """
-
-    def __init__(self, message):
-        super().__init__(message)
-
-
@dataclass
 class ModelSettings:
    name: str
@ -164,30 +146,18 @@ class Model:
    max_chat_history_tokens = 1024
    weak_model = None

-    def __init__(self, model, weak_model=None, require_model_info=True, validate_environment=True):
+    def __init__(self, model, weak_model=None):
        self.name = model

        # Are all needed keys/params available?
        res = litellm.validate_environment(model)
-        missing_keys = res.get("missing_keys")
-        keys_in_environment = res.get("keys_in_environment")
-
-        if missing_keys:
-            if validate_environment:
-                res = f"To use model {model}, please set these environment variables:"
-                for key in missing_keys:
-                    res += f"- {key}"
-                raise ModelEnvironmentError(res)
-        elif not keys_in_environment:
-            # https://github.com/BerriAI/litellm/issues/3190
-            print(f"Unable to check environment variables for model {model}")
+        self.missing_keys = res.get("missing_keys")
+        self.keys_in_environment = res.get("keys_in_environment")

        # Do we have the model_info?
        try:
            self.info = litellm.get_model_info(model)
        except Exception:
-            if require_model_info:
-                raise NoModelInfo(model)
            self.info = dict()

        if self.info.get("max_input_tokens", 0) < 32 * 1024:
@ -199,7 +169,7 @@ class Model:
        if weak_model is False:
            self.weak_model_name = None
        else:
-            self.get_weak_model(weak_model, require_model_info)
+            self.get_weak_model(weak_model)

    def configure_model_settings(self, model):
        for ms in MODEL_SETTINGS:
@ -210,7 +180,9 @@ class Model:
                    setattr(self, field.name, val)
                return  # <--

-        if "llama3" in model and "70b" in model:
+        model = model.lower()
+
+        if ("llama3" in model or "llama-3" in model) and "70b" in model:
            self.edit_format = "diff"
            self.use_repo_map = True
            self.send_undo_reply = True
@ -235,7 +207,7 @@ class Model:
    def __str__(self):
        return self.name

-    def get_weak_model(self, provided_weak_model_name, require_model_info):
+    def get_weak_model(self, provided_weak_model_name):
        # If weak_model_name is provided, override the model settings
        if provided_weak_model_name:
            self.weak_model_name = provided_weak_model_name
@ -251,7 +223,6 @@ class Model:
        self.weak_model = Model(
            self.weak_model_name,
            weak_model=False,
-            require_model_info=require_model_info,
        )
        return self.weak_model

@ -313,19 +284,6 @@ class Model:
            return img.size


-def check_model_name(model):
-    res = f"Unknown model {model}"
-
-    possible_matches = fuzzy_match_models(model)
-
-    if possible_matches:
-        res += ", did you mean one of these?"
-        for match in possible_matches:
-            res += "\n- " + match
-
-    return res
-
-
 def fuzzy_match_models(name):
    models = litellm.model_cost.keys()

--- a/docs/llms.md
+++ b/docs/llms.md
@ -29,6 +29,7 @@ So you should expect that models which are less capable than GPT-3.5 may struggl
 - [OpenRouter](#openrouter)
 - [OpenAI compatible APIs](#openai-compatible-apis)
 - [Other LLMs](#other-llms)
+- [Model warnings](#model-warnings)
 - [Editing format](#editing-format)

 ## OpenAI
@ -136,15 +137,19 @@ aider --model openrouter/<provider>/<model>

 ## OpenAI compatible APIs

-If your LLM is accessible via an OpenAI compatible API endpoint,
-you can use `--openai-api-base` to have aider connect to it.
+Aider can connect to any LLM which is accessible via an OpenAI compatible API endpoint.
+Use `--openai-api-base` or set the `OPENAI_API_BASE`
+environment variable to have aider connect to it.

-You might need to use `--no-require-model-info` if aider doesn't
-recognize the model you want to use.
-For unknown models, aider won't have normal metadata available like
-the context window size, token costs, etc.
-Some minor functionality will be limited when using such models.
+```
+export OPENAI_API_BASE=<your-endpoint-goes-here>
+export OPENAI_API_KEY=<your-key-goes-here-if-required>
+aider --model <model-name>
+```

+See the [Model warnings](#model-warnings)
+section for information on warnings which will occur
+when working with models that aider is not familiar with.

 ## Other LLMs

@ -178,6 +183,64 @@ error message listing which parameters are needed.
 See the [list of providers supported by litellm](https://docs.litellm.ai/docs/providers)
 for more details.

+## Model warnings
+
+On startup, aider tries to sanity check that it is configured correctly
+to work with the specified models:
+
+- It checks to see that all required environment variables are set for the model. These variables are required to configure things like API keys, API base URLs, etc.
+- It checks a metadata database to look up the context window size and token costs for the model.
+
+Sometimes one or both of these checks will fail, so aider will issue
+some of the following warnings.
+
+##### Missing environment variables
+
+You need to set the listed variables.
+Otherwise you will get error messages when you attempt to chat with the model.
+
+```
+Model azure/gpt-4-turbo: Missing these environment variables:
+- AZURE_API_BASE
+- AZURE_API_VERSION
+- AZURE_API_KEY
+```
+
+##### Unknown which environment variables are required
+
+Aider is unable verify the environment because it doesn't know
+which variables are required for the model.
+If required variables are missing,
+you may get errors when you attempt to chat with the model.
+You can look in the
+[litellm provider documentation](https://docs.litellm.ai/docs/providers)
+to see if the required variables are listed there.
+```
+Model gpt-5: Unknown which environment variables are required.
+```
+
+
+##### Unknown model, did you mean?
+
+If you specify a model that aider has never heard of, you will get an
+"unknown model" warning.
+
+In this case, aider won't have normal metadata available like
+the context window size, token costs, etc.
+Some minor functionality will be limited when using such models, but
+it's not really a significant problem.
+
+Aider will also try to suggest similarly named models,
+in case you made a typo or mistake when specifying the model name.
+
+```
+Model gpt-5: Unknown model, context window size and token costs unavailable.
+Did you mean one of these?
+- gpt-4
+```
+
+
+

 ## Editing format