Merge branch 'main' into sitter-map

2025-05-25 06:44:59 +00:00 · 2023-09-01 12:00:48 -07:00 · 2023-09-01 12:00:48 -07:00 · fa6ae80653
commit fa6ae80653
parent d832860599 30a3cc0847
13 changed files with 163 additions and 42 deletions
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@ -183,7 +183,7 @@ class Coder:
        for fname in self.get_inchat_relative_files():
            self.io.tool_output(f"Added {fname} to the chat.")

-        self.summarizer = ChatSummary()
+        self.summarizer = ChatSummary(models.Model.weak_model())
        self.summarizer_thread = None
        self.summarized_done_messages = None

@ -629,7 +629,7 @@ class Coder:
                if len(chunk.choices) == 0:
                    continue

-                if chunk.choices[0].finish_reason == "length":
+                if hasattr(chunk.choices[0], "finish_reason") and chunk.choices[0].finish_reason == "length":
                    raise ExhaustedContextWindow()

                try:
--- a/aider/commands.py
+++ b/aider/commands.py
@ -5,7 +5,6 @@ import sys
 from pathlib import Path

 import git
-import tiktoken
 from prompt_toolkit.completion import Completion

 from aider import prompts, voice
@ -24,7 +23,7 @@ class Commands:
            voice_language = None

        self.voice_language = voice_language
-        self.tokenizer = tiktoken.encoding_for_model(coder.main_model.name)
+        self.tokenizer = coder.main_model.tokenizer

    def is_command(self, inp):
        if inp[0] == "/":
--- a/aider/history.py
+++ b/aider/history.py
@ -9,9 +9,10 @@ from aider.sendchat import simple_send_with_retries


 class ChatSummary:
-    def __init__(self, model=models.GPT35.name, max_tokens=1024):
-        self.tokenizer = tiktoken.encoding_for_model(model)
+    def __init__(self, model=models.Model.weak_model(), max_tokens=1024):
+        self.tokenizer = model.tokenizer
        self.max_tokens = max_tokens
+        self.model = model

    def too_big(self, messages):
        sized = self.tokenize(messages)
@ -85,7 +86,7 @@ class ChatSummary:
            dict(role="user", content=content),
        ]

-        summary = simple_send_with_retries(model=models.GPT35.name, messages=messages)
+        summary = simple_send_with_retries(self.model.name, messages)
        summary = prompts.summary_prefix + summary

        return [dict(role="user", content=summary)]
@ -123,7 +124,7 @@ def main():

        assistant.append(line)

-    summarizer = ChatSummary(models.GPT35.name)
+    summarizer = ChatSummary(models.Model.weak_model())
    summary = summarizer.summarize(messages[-40:])
    dump(summary)

--- a/aider/main.py
+++ b/aider/main.py
@ -449,8 +449,6 @@ def main(argv=None, input=None, output=None, force_git_root=None):
            )
        return 1

-    main_model = models.Model(args.model)
-
    openai.api_key = args.openai_api_key
    for attr in ("base", "type", "version", "deployment_id", "engine"):
        arg_key = f"openai_api_{attr}"
@ -460,6 +458,8 @@ def main(argv=None, input=None, output=None, force_git_root=None):
            setattr(openai, mod_key, val)
            io.tool_output(f"Setting openai.{mod_key}={val}")

+    main_model = models.Model.create(args.model)
+
    try:
        coder = Coder.create(
            main_model,
--- a/aider/models/init.py
+++ b/aider/models/init.py
@ -0,0 +1,7 @@
+from .openai import OpenAIModel
+from .openrouter import OpenRouterModel
+from .model import Model
+
+GPT4 = Model.create('gpt-4')
+GPT35 = Model.create('gpt-3.5-turbo')
+GPT35_16k = Model.create('gpt-3.5-turbo-16k')
--- a/aider/models/model.py
+++ b/aider/models/model.py
@ -0,0 +1,36 @@
+import openai 
+class Model:
+    name = None
+    edit_format = None
+    max_context_tokens = 0
+    tokenizer = None
+
+    always_available = False
+    use_repo_map = False
+    send_undo_reply = False
+
+    prompt_price = None
+    completion_price = None
+
+    @classmethod
+    def create(cls, name):
+        from .openai import OpenAIModel
+        from .openrouter import OpenRouterModel
+        if ("openrouter.ai" in openai.api_base):
+            return OpenRouterModel(name)
+        return OpenAIModel(name)
+
+    def __str__(self):
+        return self.name
+
+    @staticmethod
+    def strong_model():
+        return Model.create('gpt-4')
+
+    @staticmethod
+    def weak_model():
+        return Model.create('gpt-3.5-turbo')
+
+    @staticmethod
+    def commit_message_models():
+        return [Model.create('gpt-3.5-turbo'), Model.create('gpt-3.5-turbo-16k')]
--- a/aider/models/openai.py
+++ b/aider/models/openai.py
@ -1,4 +1,6 @@
+import tiktoken
 import re
+from .model import Model

 known_tokens = {
    "gpt-3.5-turbo": 4,
@ -6,14 +8,7 @@ known_tokens = {
 }


-class Model:
-    always_available = False
-    use_repo_map = False
-    send_undo_reply = False
-
-    prompt_price = None
-    completion_price = None
-
+class OpenAIModel(Model):
    def __init__(self, name):
        self.name = name

@ -31,6 +26,7 @@ class Model:
            raise ValueError(f"Unknown context window size for model: {name}")

        self.max_context_tokens = tokens * 1024
+        self.tokenizer = tiktoken.encoding_for_model(name)

        if self.is_gpt4():
            self.edit_format = "diff"
@ -66,11 +62,3 @@ class Model:

    def is_gpt35(self):
        return self.name.startswith("gpt-3.5-turbo")
-
-    def __str__(self):
-        return self.name
-
-
-GPT4 = Model("gpt-4")
-GPT35 = Model("gpt-3.5-turbo")
-GPT35_16k = Model("gpt-3.5-turbo-16k")
--- a/aider/models/openrouter.py
+++ b/aider/models/openrouter.py
@ -0,0 +1,43 @@
+import openai
+import tiktoken
+from .model import Model
+
+cached_model_details = None
+
+
+class OpenRouterModel(Model):
+    def __init__(self, name):
+        if name == 'gpt-4':
+            name = 'openai/gpt-4'
+        elif name == 'gpt-3.5-turbo':
+            name = 'openai/gpt-3.5-turbo'
+        elif name == 'gpt-3.5-turbo-16k':
+            name = 'openai/gpt-3.5-turbo-16k'
+
+        self.name = name
+        self.edit_format = edit_format_for_model(name)
+        self.use_repo_map = self.edit_format == "diff"
+
+        # TODO: figure out proper encodings for non openai models
+        self.tokenizer = tiktoken.get_encoding("cl100k_base")
+
+        global cached_model_details
+        if cached_model_details == None:
+            cached_model_details = openai.Model.list().data
+        found = next((details for details in cached_model_details if details.get('id') == name), None)
+
+        if found:
+            self.max_context_tokens = int(found.get('context_length'))
+            self.prompt_price = round(float(found.get('pricing').get('prompt')) * 1000,6)
+            self.completion_price = round(float(found.get('pricing').get('completion')) * 1000,6)
+
+        else:
+            raise ValueError(f'invalid openrouter model: {name}')
+
+
+# TODO run benchmarks and figure out which models support which edit-formats
+def edit_format_for_model(name):
+    if any(str in name for str in ['gpt-4', 'claude-2']):
+        return "diff"
+
+    return "whole"
--- a/aider/repo.py
+++ b/aider/repo.py
@ -109,8 +109,8 @@ class GitRepo:
            dict(role="user", content=content),
        ]

-        for model in [models.GPT35.name, models.GPT35_16k.name]:
-            commit_message = simple_send_with_retries(model, messages)
+        for model in models.Model.commit_message_models():
+            commit_message = simple_send_with_retries(model.name, messages)
            if commit_message:
                break

--- a/aider/repomap.py
+++ b/aider/repomap.py
@ -6,7 +6,6 @@ from collections import Counter, defaultdict, namedtuple
 from pathlib import Path

 import networkx as nx
-import tiktoken
 from diskcache import Cache
 from grep_ast import TreeContext
 from tqdm import tqdm
@ -87,7 +86,7 @@ class RepoMap:
        self,
        map_tokens=1024,
        root=None,
-        main_model=models.GPT4,
+        main_model=models.Model.strong_model(),
        io=None,
        repo_content_prefix=None,
        verbose=False,
@ -103,7 +102,7 @@ class RepoMap:

        self.max_map_tokens = map_tokens

-        self.tokenizer = tiktoken.encoding_for_model(main_model.name)
+        self.tokenizer = main_model.tokenizer
        self.repo_content_prefix = repo_content_prefix

    def get_repo_map(self, chat_files, other_files):
--- a/aider/sendchat.py
+++ b/aider/sendchat.py
@ -34,9 +34,9 @@ CACHE = None
        f"{details.get('exception','Exception')}\nRetry in {details['wait']:.1f} seconds."
    ),
 )
-def send_with_retries(model, messages, functions, stream):
+def send_with_retries(model_name, messages, functions, stream):
    kwargs = dict(
-        model=model,
+        model=model_name,
        messages=messages,
        temperature=0,
        stream=stream,
@ -50,6 +50,12 @@ def send_with_retries(model, messages, functions, stream):
    if hasattr(openai, "api_engine"):
        kwargs["engine"] = openai.api_engine

+    if "openrouter.ai" in openai.api_base:
+        kwargs["headers"] = {
+            "HTTP-Referer": "http://aider.chat",
+            "X-Title": "Aider"
+        }
+
    key = json.dumps(kwargs, sort_keys=True).encode()

    # Generate SHA1 hash of kwargs and append it to chat_completion_call_hashes
@ -66,10 +72,10 @@ def send_with_retries(model, messages, functions, stream):
    return hash_object, res


-def simple_send_with_retries(model, messages):
+def simple_send_with_retries(model_name, messages):
    try:
        _hash, response = send_with_retries(
-            model=model,
+            model_name=model_name,
            messages=messages,
            functions=None,
            stream=False,
--- a/docs/faq.md
+++ b/docs/faq.md
@ -4,6 +4,7 @@
 - [How does aider use git?](#how-does-aider-use-git)
 - [GPT-4 vs GPT-3.5](#gpt-4-vs-gpt-35)
 - [Aider isn't editing my files?](#aider-isnt-editing-my-files)
+- [Accessing other LLMs with OpenRouter](#accessing-other-llms-with-openrouter)
 - [Can I use aider with other LLMs, local LLMs, etc?](#can-i-use-aider-with-other-llms-local-llms-etc)
 - [Can I change the system prompts that aider uses?](#can-i-change-the-system-prompts-that-aider-uses)
 - [Can I run aider in Google Colab?](#can-i-run-aider-in-google-colab)
@ -112,6 +113,19 @@ In these cases, here are some things you might try:
  - Use `/drop` to remove files from the chat session which aren't needed for the task at hand. This will reduce distractions and may help GPT produce properly formatted edits.
  - Use `/clear` to remove the conversation history, again to help GPT focus.

+## Accessing other LLMs with OpenRouter
+
+[OpenRouter](https://openrouter.ai) provide an interface to [many models](https://openrouter.ai/docs) which are not widely accessible, in particular gpt-4-32k and claude-2.
+
+To access the openrouter models simply
+
+- register for an account, purchase some credits and generate an api key
+- set --openai-api-base to https://openrouter.ai/api/v1
+- set --openai-api-key to your openrouter key
+- set --model to the model of your choice (openai/gpt-4-32k, anthropic/claude-2 etc.)
+
+Some of the models weren't very functional and each llm has its own quirks. The anthropic models work ok, but the llama-2 ones in particular will need more work to play friendly with aider.
+
 ## Can I use aider with other LLMs, local LLMs, etc?

 Aider provides experimental support for LLMs other than OpenAI's GPT-3.5 and GPT-4. The support is currently only experimental for two reasons:
--- a/tests/test_models.py
+++ b/tests/test_models.py
@ -1,28 +1,56 @@
 import unittest
+from unittest.mock import patch

-from aider.models import Model
+from aider.models import Model, OpenRouterModel


 class TestModels(unittest.TestCase):
    def test_max_context_tokens(self):
-        model = Model("gpt-3.5-turbo")
+        model = Model.create("gpt-3.5-turbo")
        self.assertEqual(model.max_context_tokens, 4 * 1024)

-        model = Model("gpt-3.5-turbo-16k")
+        model = Model.create("gpt-3.5-turbo-16k")
        self.assertEqual(model.max_context_tokens, 16 * 1024)

-        model = Model("gpt-4")
+        model = Model.create("gpt-4")
        self.assertEqual(model.max_context_tokens, 8 * 1024)

-        model = Model("gpt-4-32k")
+        model = Model.create("gpt-4-32k")
        self.assertEqual(model.max_context_tokens, 32 * 1024)

-        model = Model("gpt-4-0101")
+        model = Model.create("gpt-4-0101")
        self.assertEqual(model.max_context_tokens, 8 * 1024)

-        model = Model("gpt-4-32k-2123")
+        model = Model.create("gpt-4-32k-2123")
        self.assertEqual(model.max_context_tokens, 32 * 1024)


+    @patch('openai.Model.list')
+    def test_openrouter_model_properties(self, mock_model_list):
+        import openai
+        old_base = openai.api_base
+        openai.api_base = 'https://openrouter.ai/api/v1'
+        mock_model_list.return_value = {
+            'data': [
+                {
+                    'id': 'openai/gpt-4',
+                    'object': 'model',
+                    'context_length': '8192',
+                    'pricing': {
+                        'prompt': '0.00006',
+                        'completion': '0.00012'
+                    }
+                }
+            ]
+        }
+        mock_model_list.return_value = type('', (), {'data': mock_model_list.return_value['data']})()
+
+        model = OpenRouterModel("gpt-4")
+        self.assertEqual(model.name, 'openai/gpt-4')
+        self.assertEqual(model.max_context_tokens, 8192)
+        self.assertEqual(model.prompt_price, 0.06)
+        self.assertEqual(model.completion_price, 0.12)
+        openai.api_base = old_base
+
 if __name__ == "__main__":
    unittest.main()