Merge d2e2b9b3d5 into 3caab85931

2025-05-29 00:35:00 +00:00 · 2025-05-17 11:37:26 +02:00 · 2025-05-17 11:37:26 +02:00 · f24a58bc91
commit f24a58bc91
parent 3caab85931 d2e2b9b3d5
2 changed files with 81 additions and 0 deletions
--- a/aider/models.py
+++ b/aider/models.py
@ -1,3 +1,4 @@
 import asyncio
 import difflib
 import hashlib
 import importlib.resources
@ -15,6 +16,7 @@ import json5
 import yaml
 from PIL import Image
 from aider import ollama
 from aider.dump import dump  # noqa: F401
 from aider.llm import litellm
 from aider.openrouter import OpenRouterModelManager
@ -994,6 +996,16 @@ def register_models(model_settings_fnames):
 def register_litellm_models(model_fnames):
    files_loaded = []
    # Add available ollama models
    if os.getenv("OLLAMA_API_BASE"):
        try:
            model_def = asyncio.run(ollama.query_available_models())
            model_info_manager.local_model_metadata.update(model_def)
        except Exception as e:
            raise Exception(f"Error querying ollama models: {e}")
    # Load from static model database
    for model_fname in model_fnames:
        if not os.path.exists(model_fname):
            continue
--- a/aider/ollama.py
+++ b/aider/ollama.py
@ -0,0 +1,69 @@
 import asyncio
 import os
 import aiohttp
 async def query_available_models():
    api_base = os.getenv("OLLAMA_API_BASE")
    if not api_base:
        return {}
    async with aiohttp.ClientSession() as session:
        # Ping the tags endpoint to get model names
        async with session.get(f"{api_base}/api/tags") as response:
            if response.status != 200:
                return {}
            tags = await response.json()
            model_names = [tag["name"] for tag in tags["models"]]
            # Wait for all model descriptions to complete
            model_descriptions = await asyncio.gather(
                *[describe_ollama_model(model_name) for model_name in model_names]
            )
            # Merge the results into a single dictionary
            result = {}
            for model_desc in model_descriptions:
                result.update(model_desc)
            return result
 async def describe_ollama_model(model_name):
    api_base = os.getenv("OLLAMA_API_BASE")
    context_length = None
    async with aiohttp.ClientSession() as session:
        # Ping the /show endpoint to get context length
        async with session.post(f"{api_base}/api/show", json={"model": model_name}) as response:
            if response.status != 200:
                return {}
            json = await response.json()
            model_info = json.get("model_info")
            for key in model_info:
                # Model native context length is usually stored in a key like
                # "llama.context_length" or "qwen3.context_length"
                if "context_length" in key:
                    context_length = model_info[key]
                    break
    return {
        "ollama/"
        + model_name: {
            "max_tokens": context_length,
            "max_input_tokens": context_length,
            "max_output_tokens": context_length,
            "input_cost_per_token": 0,
            "input_cost_per_token_cache_hit": 0,
            "cache_read_input_token_cost": 0,
            "cache_creation_input_token_cost": 0,
            "output_cost_per_token": 0,
            "litellm_provider": "ollama",
            "mode": "chat",
            "supports_function_calling": False,
            "supports_assistant_prefill": False,
            "supports_tool_choice": False,
            "supports_prompt_caching": False,
        }
    }