Better unknown model warnings

2025-06-01 10:14:59 +00:00 · 2024-04-22 14:07:32 -07:00 · 2024-04-22 14:07:32 -07:00 · efd3c39e50
commit efd3c39e50
parent f1ce673f78
4 changed files with 125 additions and 69 deletions
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@ -137,10 +137,13 @@ class Coder:
        self.main_model = main_model
        weak_model = main_model.weak_model
-        self.io.tool_output(
+        prefix = "Model:"
-            f"Models: {main_model.name} with {self.edit_format} edit format, weak model"
+        output = f" {main_model.name} with {self.edit_format} edit format"
-            f" {weak_model.name}"
+        if weak_model is not main_model:
-        )
+            prefix = "Models:"
            output += f", weak model {weak_model.name}"
        self.io.tool_output(prefix + output)
        self.show_diffs = show_diffs
--- a/aider/main.py
+++ b/aider/main.py
@ -21,6 +21,36 @@ os.environ["OR_SITE_URL"] = "http://aider.chat"
 os.environ["OR_APP_NAME"] = "Aider"
 def sanity_check_model(io, model):
    show = False
    if model.missing_keys:
        show = True
        io.tool_error(f"Model {model}: Missing these environment variables:")
        for key in model.missing_keys:
            io.tool_error(f"- {key}")
    elif not model.keys_in_environment:
        show = True
        io.tool_error(f"Model {model}: Unknown which environment variables are required.")
    if not model.info:
        show = True
        io.tool_error(
            f"Model {model}: Unknown model, context window size and token costs unavailable."
        )
        possible_matches = models.fuzzy_match_models(model.name)
        if possible_matches:
            io.tool_error("Did you mean one of these?")
            for match in possible_matches:
                io.tool_error(f"- {match}")
    if show:
        io.tool_error("For more info see https://aider.chat/docs/llms.html#model-warnings")
    return False
 def get_git_root():
    """Try and guess the git repo, since the conf.yml can be at the repo root"""
    try:
@ -277,7 +307,7 @@ def main(argv=None, input=None, output=None, force_git_root=None):
    model_group.add_argument(
        "--require-model-info",
        action=argparse.BooleanOptionalAction,
-        default=True,
+        default=False,
        help="Only work with models that have meta-data available (default: True)",
    )
    model_group.add_argument(
@ -602,13 +632,15 @@ def main(argv=None, input=None, output=None, force_git_root=None):
    if args.openai_organization_id:
        os.environ["OPENAI_ORGANIZATION"] = args.openai_organization_id
-    # Check in advance that we have model metadata
+    main_model = models.Model(args.model, weak_model=args.weak_model)
-    try:
+
-        main_model = models.Model(
+    missing_model_info = False
-            args.model, weak_model=args.weak_model, require_model_info=args.require_model_info
+    if not sanity_check_model(io, main_model):
-        )
+        missing_model_info = True
-    except (models.NoModelInfo, models.ModelEnvironmentError) as err:
+    if main_model.weak_model and main_model.weak_model is not main_model:
-        io.tool_error(str(err))
+        if not sanity_check_model(io, main_model.weak_model):
            missing_model_info = True
    if args.require_model_info and missing_model_info:
        return 1
    try:
--- a/aider/models.py
+++ b/aider/models.py
@ -13,24 +13,6 @@ from aider.dump import dump  # noqa: F401
 DEFAULT_MODEL_NAME = "gpt-4-1106-preview"
 class NoModelInfo(Exception):
    """
    Exception raised when model information cannot be retrieved.
    """
    def __init__(self, model):
        super().__init__(check_model_name(model))
 class ModelEnvironmentError(Exception):
    """
    Exception raised when the environment isn't setup for the model
    """
    def __init__(self, message):
        super().__init__(message)
@dataclass
 class ModelSettings:
    name: str
@ -164,30 +146,18 @@ class Model:
    max_chat_history_tokens = 1024
    weak_model = None
-    def __init__(self, model, weak_model=None, require_model_info=True, validate_environment=True):
+    def __init__(self, model, weak_model=None):
        self.name = model
        # Are all needed keys/params available?
        res = litellm.validate_environment(model)
-        missing_keys = res.get("missing_keys")
+        self.missing_keys = res.get("missing_keys")
-        keys_in_environment = res.get("keys_in_environment")
+        self.keys_in_environment = res.get("keys_in_environment")
        if missing_keys:
            if validate_environment:
                res = f"To use model {model}, please set these environment variables:"
                for key in missing_keys:
                    res += f"- {key}"
                raise ModelEnvironmentError(res)
        elif not keys_in_environment:
            # https://github.com/BerriAI/litellm/issues/3190
            print(f"Unable to check environment variables for model {model}")
        # Do we have the model_info?
        try:
            self.info = litellm.get_model_info(model)
        except Exception:
            if require_model_info:
                raise NoModelInfo(model)
            self.info = dict()
        if self.info.get("max_input_tokens", 0) < 32 * 1024:
@ -199,7 +169,7 @@ class Model:
        if weak_model is False:
            self.weak_model_name = None
        else:
-            self.get_weak_model(weak_model, require_model_info)
+            self.get_weak_model(weak_model)
    def configure_model_settings(self, model):
        for ms in MODEL_SETTINGS:
@ -210,7 +180,9 @@ class Model:
                    setattr(self, field.name, val)
                return  # <--
-        if "llama3" in model and "70b" in model:
+        model = model.lower()
        if ("llama3" in model or "llama-3" in model) and "70b" in model:
            self.edit_format = "diff"
            self.use_repo_map = True
            self.send_undo_reply = True
@ -235,7 +207,7 @@ class Model:
    def __str__(self):
        return self.name
-    def get_weak_model(self, provided_weak_model_name, require_model_info):
+    def get_weak_model(self, provided_weak_model_name):
        # If weak_model_name is provided, override the model settings
        if provided_weak_model_name:
            self.weak_model_name = provided_weak_model_name
@ -251,7 +223,6 @@ class Model:
        self.weak_model = Model(
            self.weak_model_name,
            weak_model=False,
            require_model_info=require_model_info,
        )
        return self.weak_model
@ -313,19 +284,6 @@ class Model:
            return img.size
 def check_model_name(model):
    res = f"Unknown model {model}"
    possible_matches = fuzzy_match_models(model)
    if possible_matches:
        res += ", did you mean one of these?"
        for match in possible_matches:
            res += "\n- " + match
    return res
 def fuzzy_match_models(name):
    models = litellm.model_cost.keys()
--- a/docs/llms.md
+++ b/docs/llms.md
@ -29,6 +29,7 @@ So you should expect that models which are less capable than GPT-3.5 may struggl
 - [OpenRouter](#openrouter)
 - [OpenAI compatible APIs](#openai-compatible-apis)
 - [Other LLMs](#other-llms)
 - [Model warnings](#model-warnings)
 - [Editing format](#editing-format)
 ## OpenAI
@ -136,15 +137,19 @@ aider --model openrouter/<provider>/<model>
 ## OpenAI compatible APIs
-If your LLM is accessible via an OpenAI compatible API endpoint,
+Aider can connect to any LLM which is accessible via an OpenAI compatible API endpoint.
-you can use `--openai-api-base` to have aider connect to it.
+Use `--openai-api-base` or set the `OPENAI_API_BASE`
 environment variable to have aider connect to it.
-You might need to use `--no-require-model-info` if aider doesn't
+```
-recognize the model you want to use.
+export OPENAI_API_BASE=<your-endpoint-goes-here>
-For unknown models, aider won't have normal metadata available like
+export OPENAI_API_KEY=<your-key-goes-here-if-required>
-the context window size, token costs, etc.
+aider --model <model-name>
-Some minor functionality will be limited when using such models.
+```
 See the [Model warnings](#model-warnings)
 section for information on warnings which will occur
 when working with models that aider is not familiar with.
 ## Other LLMs
@ -178,6 +183,64 @@ error message listing which parameters are needed.
 See the [list of providers supported by litellm](https://docs.litellm.ai/docs/providers)
 for more details.
 ## Model warnings
 On startup, aider tries to sanity check that it is configured correctly
 to work with the specified models:
 - It checks to see that all required environment variables are set for the model. These variables are required to configure things like API keys, API base URLs, etc.
 - It checks a metadata database to look up the context window size and token costs for the model.
 Sometimes one or both of these checks will fail, so aider will issue
 some of the following warnings.
 ##### Missing environment variables
 You need to set the listed variables.
 Otherwise you will get error messages when you attempt to chat with the model.
 ```
 Model azure/gpt-4-turbo: Missing these environment variables:
 - AZURE_API_BASE
 - AZURE_API_VERSION
 - AZURE_API_KEY
 ```
 ##### Unknown which environment variables are required
 Aider is unable verify the environment because it doesn't know
 which variables are required for the model.
 If required variables are missing,
 you may get errors when you attempt to chat with the model.
 You can look in the
 [litellm provider documentation](https://docs.litellm.ai/docs/providers)
 to see if the required variables are listed there.
 ```
 Model gpt-5: Unknown which environment variables are required.
 ```
 ##### Unknown model, did you mean?
 If you specify a model that aider has never heard of, you will get an
 "unknown model" warning.
 In this case, aider won't have normal metadata available like
 the context window size, token costs, etc.
 Some minor functionality will be limited when using such models, but
 it's not really a significant problem.
 Aider will also try to suggest similarly named models,
 in case you made a typo or mistake when specifying the model name.
 ```
 Model gpt-5: Unknown model, context window size and token costs unavailable.
 Did you mean one of these?
 - gpt-4
 ```
 ## Editing format