diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index a2ccde829..d103396d9 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -190,7 +190,7 @@ class Coder: for fname in self.get_inchat_relative_files(): self.io.tool_output(f"Added {fname} to the chat.") - self.summarizer = ChatSummary() + self.summarizer = ChatSummary(models.Model.weak_model()) self.summarizer_thread = None self.summarized_done_messages = None @@ -636,7 +636,7 @@ class Coder: if len(chunk.choices) == 0: continue - if chunk.choices[0].finish_reason == "length": + if hasattr(chunk.choices[0], "finish_reason") and chunk.choices[0].finish_reason == "length": raise ExhaustedContextWindow() try: diff --git a/aider/commands.py b/aider/commands.py index c05bd1204..a4caf7142 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -5,7 +5,6 @@ import sys from pathlib import Path import git -import tiktoken from prompt_toolkit.completion import Completion from aider import prompts, voice @@ -24,7 +23,7 @@ class Commands: voice_language = None self.voice_language = voice_language - self.tokenizer = tiktoken.encoding_for_model(coder.main_model.name) + self.tokenizer = coder.main_model.tokenizer def is_command(self, inp): if inp[0] == "/": diff --git a/aider/history.py b/aider/history.py index 912a19108..36d148344 100644 --- a/aider/history.py +++ b/aider/history.py @@ -9,9 +9,10 @@ from aider.sendchat import simple_send_with_retries class ChatSummary: - def __init__(self, model=models.GPT35.name, max_tokens=1024): - self.tokenizer = tiktoken.encoding_for_model(model) + def __init__(self, model=models.Model.weak_model(), max_tokens=1024): + self.tokenizer = model.tokenizer self.max_tokens = max_tokens + self.model = model def too_big(self, messages): sized = self.tokenize(messages) @@ -85,7 +86,7 @@ class ChatSummary: dict(role="user", content=content), ] - summary = simple_send_with_retries(model=models.GPT35.name, messages=messages) + summary = simple_send_with_retries(self.model.name, messages) summary = prompts.summary_prefix + summary return [dict(role="user", content=summary)] @@ -123,7 +124,7 @@ def main(): assistant.append(line) - summarizer = ChatSummary(models.GPT35.name) + summarizer = ChatSummary(models.Model.weak_model()) summary = summarizer.summarize(messages[-40:]) dump(summary) diff --git a/aider/main.py b/aider/main.py index 8d9b384b0..755e118bb 100644 --- a/aider/main.py +++ b/aider/main.py @@ -449,8 +449,6 @@ def main(argv=None, input=None, output=None, force_git_root=None): ) return 1 - main_model = models.Model(args.model) - openai.api_key = args.openai_api_key for attr in ("base", "type", "version", "deployment_id", "engine"): arg_key = f"openai_api_{attr}" @@ -460,6 +458,8 @@ def main(argv=None, input=None, output=None, force_git_root=None): setattr(openai, mod_key, val) io.tool_output(f"Setting openai.{mod_key}={val}") + main_model = models.Model.create(args.model) + try: coder = Coder.create( main_model, diff --git a/aider/models/__init__.py b/aider/models/__init__.py new file mode 100644 index 000000000..f5a6fa7a8 --- /dev/null +++ b/aider/models/__init__.py @@ -0,0 +1,7 @@ +from .openai import OpenAIModel +from .openrouter import OpenRouterModel +from .model import Model + +GPT4 = Model.create('gpt-4') +GPT35 = Model.create('gpt-3.5-turbo') +GPT35_16k = Model.create('gpt-3.5-turbo-16k') diff --git a/aider/models/model.py b/aider/models/model.py new file mode 100644 index 000000000..f7169557e --- /dev/null +++ b/aider/models/model.py @@ -0,0 +1,36 @@ +import openai +class Model: + name = None + edit_format = None + max_context_tokens = 0 + tokenizer = None + + always_available = False + use_repo_map = False + send_undo_reply = False + + prompt_price = None + completion_price = None + + @classmethod + def create(cls, name): + from .openai import OpenAIModel + from .openrouter import OpenRouterModel + if ("openrouter.ai" in openai.api_base): + return OpenRouterModel(name) + return OpenAIModel(name) + + def __str__(self): + return self.name + + @staticmethod + def strong_model(): + return Model.create('gpt-4') + + @staticmethod + def weak_model(): + return Model.create('gpt-3.5-turbo') + + @staticmethod + def commit_message_models(): + return [Model.create('gpt-3.5-turbo'), Model.create('gpt-3.5-turbo-16k')] diff --git a/aider/models.py b/aider/models/openai.py similarity index 83% rename from aider/models.py rename to aider/models/openai.py index fdbb2d152..d25156758 100644 --- a/aider/models.py +++ b/aider/models/openai.py @@ -1,4 +1,6 @@ +import tiktoken import re +from .model import Model known_tokens = { "gpt-3.5-turbo": 4, @@ -6,14 +8,7 @@ known_tokens = { } -class Model: - always_available = False - use_repo_map = False - send_undo_reply = False - - prompt_price = None - completion_price = None - +class OpenAIModel(Model): def __init__(self, name): self.name = name @@ -31,6 +26,7 @@ class Model: raise ValueError(f"Unknown context window size for model: {name}") self.max_context_tokens = tokens * 1024 + self.tokenizer = tiktoken.encoding_for_model(name) if self.is_gpt4(): self.edit_format = "diff" @@ -66,11 +62,3 @@ class Model: def is_gpt35(self): return self.name.startswith("gpt-3.5-turbo") - - def __str__(self): - return self.name - - -GPT4 = Model("gpt-4") -GPT35 = Model("gpt-3.5-turbo") -GPT35_16k = Model("gpt-3.5-turbo-16k") diff --git a/aider/models/openrouter.py b/aider/models/openrouter.py new file mode 100644 index 000000000..8306c136f --- /dev/null +++ b/aider/models/openrouter.py @@ -0,0 +1,43 @@ +import openai +import tiktoken +from .model import Model + +cached_model_details = None + + +class OpenRouterModel(Model): + def __init__(self, name): + if name == 'gpt-4': + name = 'openai/gpt-4' + elif name == 'gpt-3.5-turbo': + name = 'openai/gpt-3.5-turbo' + elif name == 'gpt-3.5-turbo-16k': + name = 'openai/gpt-3.5-turbo-16k' + + self.name = name + self.edit_format = edit_format_for_model(name) + self.use_repo_map = self.edit_format == "diff" + + # TODO: figure out proper encodings for non openai models + self.tokenizer = tiktoken.get_encoding("cl100k_base") + + global cached_model_details + if cached_model_details == None: + cached_model_details = openai.Model.list().data + found = next((details for details in cached_model_details if details.get('id') == name), None) + + if found: + self.max_context_tokens = int(found.get('context_length')) + self.prompt_price = round(float(found.get('pricing').get('prompt')) * 1000,6) + self.completion_price = round(float(found.get('pricing').get('completion')) * 1000,6) + + else: + raise ValueError(f'invalid openrouter model: {name}') + + +# TODO run benchmarks and figure out which models support which edit-formats +def edit_format_for_model(name): + if any(str in name for str in ['gpt-4', 'claude-2']): + return "diff" + + return "whole" diff --git a/aider/repo.py b/aider/repo.py index 6b0c0b089..a44f235b0 100644 --- a/aider/repo.py +++ b/aider/repo.py @@ -109,8 +109,8 @@ class GitRepo: dict(role="user", content=content), ] - for model in [models.GPT35.name, models.GPT35_16k.name]: - commit_message = simple_send_with_retries(model, messages) + for model in models.Model.commit_message_models(): + commit_message = simple_send_with_retries(model.name, messages) if commit_message: break diff --git a/aider/repomap.py b/aider/repomap.py index 5f9ae3a8a..1b9f8a005 100644 --- a/aider/repomap.py +++ b/aider/repomap.py @@ -9,7 +9,6 @@ from collections import Counter, defaultdict from pathlib import Path import networkx as nx -import tiktoken from diskcache import Cache from pygments.lexers import guess_lexer_for_filename from pygments.token import Token @@ -81,7 +80,7 @@ class RepoMap: self, map_tokens=1024, root=None, - main_model=models.GPT4, + main_model=models.Model.strong_model(), io=None, repo_content_prefix=None, verbose=False, @@ -104,7 +103,7 @@ class RepoMap: else: self.use_ctags = False - self.tokenizer = tiktoken.encoding_for_model(main_model.name) + self.tokenizer = main_model.tokenizer self.repo_content_prefix = repo_content_prefix def get_repo_map(self, chat_files, other_files): diff --git a/aider/sendchat.py b/aider/sendchat.py index 5025d42dd..2269e512c 100644 --- a/aider/sendchat.py +++ b/aider/sendchat.py @@ -34,9 +34,9 @@ CACHE = None f"{details.get('exception','Exception')}\nRetry in {details['wait']:.1f} seconds." ), ) -def send_with_retries(model, messages, functions, stream): +def send_with_retries(model_name, messages, functions, stream): kwargs = dict( - model=model, + model=model_name, messages=messages, temperature=0, stream=stream, @@ -50,6 +50,12 @@ def send_with_retries(model, messages, functions, stream): if hasattr(openai, "api_engine"): kwargs["engine"] = openai.api_engine + if "openrouter.ai" in openai.api_base: + kwargs["headers"] = { + "HTTP-Referer": "http://aider.chat", + "X-Title": "Aider" + } + key = json.dumps(kwargs, sort_keys=True).encode() # Generate SHA1 hash of kwargs and append it to chat_completion_call_hashes @@ -66,10 +72,10 @@ def send_with_retries(model, messages, functions, stream): return hash_object, res -def simple_send_with_retries(model, messages): +def simple_send_with_retries(model_name, messages): try: _hash, response = send_with_retries( - model=model, + model_name=model_name, messages=messages, functions=None, stream=False, diff --git a/docs/faq.md b/docs/faq.md index e89e42724..bd71fe4bf 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -4,6 +4,7 @@ - [How does aider use git?](#how-does-aider-use-git) - [GPT-4 vs GPT-3.5](#gpt-4-vs-gpt-35) - [Aider isn't editing my files?](#aider-isnt-editing-my-files) +- [Accessing other LLMs with OpenRouter](#accessing-other-llms-with-openrouter) - [Can I use aider with other LLMs, local LLMs, etc?](#can-i-use-aider-with-other-llms-local-llms-etc) - [Can I change the system prompts that aider uses?](#can-i-change-the-system-prompts-that-aider-uses) - [Can I run aider in Google Colab?](#can-i-run-aider-in-google-colab) @@ -112,6 +113,19 @@ In these cases, here are some things you might try: - Use `/drop` to remove files from the chat session which aren't needed for the task at hand. This will reduce distractions and may help GPT produce properly formatted edits. - Use `/clear` to remove the conversation history, again to help GPT focus. +## Accessing other LLMs with OpenRouter + +[OpenRouter](https://openrouter.ai) provide an interface to [many models](https://openrouter.ai/docs) which are not widely accessible, in particular gpt-4-32k and claude-2. + +To access the openrouter models simply + +- register for an account, purchase some credits and generate an api key +- set --openai-api-base to https://openrouter.ai/api/v1 +- set --openai-api-key to your openrouter key +- set --model to the model of your choice (openai/gpt-4-32k, anthropic/claude-2 etc.) + +Some of the models weren't very functional and each llm has its own quirks. The anthropic models work ok, but the llama-2 ones in particular will need more work to play friendly with aider. + ## Can I use aider with other LLMs, local LLMs, etc? Aider provides experimental support for LLMs other than OpenAI's GPT-3.5 and GPT-4. The support is currently only experimental for two reasons: diff --git a/tests/test_models.py b/tests/test_models.py index af2a6f8d7..6856a9827 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,28 +1,56 @@ import unittest +from unittest.mock import patch -from aider.models import Model +from aider.models import Model, OpenRouterModel class TestModels(unittest.TestCase): def test_max_context_tokens(self): - model = Model("gpt-3.5-turbo") + model = Model.create("gpt-3.5-turbo") self.assertEqual(model.max_context_tokens, 4 * 1024) - model = Model("gpt-3.5-turbo-16k") + model = Model.create("gpt-3.5-turbo-16k") self.assertEqual(model.max_context_tokens, 16 * 1024) - model = Model("gpt-4") + model = Model.create("gpt-4") self.assertEqual(model.max_context_tokens, 8 * 1024) - model = Model("gpt-4-32k") + model = Model.create("gpt-4-32k") self.assertEqual(model.max_context_tokens, 32 * 1024) - model = Model("gpt-4-0101") + model = Model.create("gpt-4-0101") self.assertEqual(model.max_context_tokens, 8 * 1024) - model = Model("gpt-4-32k-2123") + model = Model.create("gpt-4-32k-2123") self.assertEqual(model.max_context_tokens, 32 * 1024) + @patch('openai.Model.list') + def test_openrouter_model_properties(self, mock_model_list): + import openai + old_base = openai.api_base + openai.api_base = 'https://openrouter.ai/api/v1' + mock_model_list.return_value = { + 'data': [ + { + 'id': 'openai/gpt-4', + 'object': 'model', + 'context_length': '8192', + 'pricing': { + 'prompt': '0.00006', + 'completion': '0.00012' + } + } + ] + } + mock_model_list.return_value = type('', (), {'data': mock_model_list.return_value['data']})() + + model = OpenRouterModel("gpt-4") + self.assertEqual(model.name, 'openai/gpt-4') + self.assertEqual(model.max_context_tokens, 8192) + self.assertEqual(model.prompt_price, 0.06) + self.assertEqual(model.completion_price, 0.12) + openai.api_base = old_base + if __name__ == "__main__": unittest.main()