More flexible models

This commit is contained in:
Paul Gauthier 2023-06-14 17:51:17 -07:00
parent 8b5dcecbe2
commit 8660d35601
4 changed files with 44 additions and 50 deletions

View file

@ -79,19 +79,20 @@ class Coder:
self.console = Console(force_terminal=True, no_color=True) self.console = Console(force_terminal=True, no_color=True)
main_model = models.get_model(main_model) main_model = models.get_model(main_model)
if main_model not in models.GPT35_models: if not main_model.is_always_available():
if not self.check_model_availability(main_model): if not self.check_model_availability(main_model):
if main_model != models.GPT4: if main_model != models.GPT4:
self.io.tool_error(f"API key does not support {main_model.name}.") self.io.tool_error(f"API key does not support {main_model.name}.")
main_model = models.GPT35_16k main_model = models.GPT35_16k
self.main_model = main_model self.main_model = main_model
if main_model in models.GPT35_models: if main_model.is_gpt35():
self.io.tool_output( self.io.tool_output(
f"Using {main_model.name} (experimental): disabling ctags/repo-maps.", f"Using {main_model.name} (experimental): disabling ctags/repo-maps.",
) )
self.gpt_prompts = prompts.GPT35() self.gpt_prompts = prompts.GPT35()
else: else:
self.io.tool_output(f"Using {main_model.name}.")
self.gpt_prompts = prompts.GPT4() self.gpt_prompts = prompts.GPT4()
self.show_diffs = show_diffs self.show_diffs = show_diffs
@ -107,7 +108,7 @@ class Coder:
self.io.tool_output("Not using git.") self.io.tool_output("Not using git.")
self.find_common_root() self.find_common_root()
if main_model in models.GPT4_models: if main_model.is_gpt4():
rm_io = io if self.verbose else None rm_io = io if self.verbose else None
self.repo_map = RepoMap( self.repo_map = RepoMap(
map_tokens, map_tokens,
@ -318,7 +319,7 @@ class Coder:
] ]
main_sys = self.gpt_prompts.main_system main_sys = self.gpt_prompts.main_system
if self.main_model in models.GPT4_models: if self.main_model.is_gpt4():
main_sys += "\n" + self.gpt_prompts.system_reminder main_sys += "\n" + self.gpt_prompts.system_reminder
messages = [ messages = [
@ -347,7 +348,7 @@ class Coder:
if edit_error: if edit_error:
return edit_error return edit_error
if self.main_model in models.GPT4_models or not edited: if self.main_model.is_gpt4() or not edited:
# Don't add 3.5 assistant messages to the history if they contain "edits" # Don't add 3.5 assistant messages to the history if they contain "edits"
# Because those edits are actually fully copies of the file! # Because those edits are actually fully copies of the file!
# That wastes too much context window. # That wastes too much context window.
@ -482,7 +483,7 @@ class Coder:
if self.pretty: if self.pretty:
show_resp = self.resp show_resp = self.resp
if self.main_model in models.GPT35_models: if self.main_model.is_gpt35():
try: try:
show_resp = self.update_files_gpt35(self.resp, mode="diff") show_resp = self.update_files_gpt35(self.resp, mode="diff")
except ValueError: except ValueError:
@ -786,9 +787,9 @@ class Coder:
return set(self.get_all_relative_files()) - set(self.get_inchat_relative_files()) return set(self.get_all_relative_files()) - set(self.get_inchat_relative_files())
def apply_updates(self, content): def apply_updates(self, content):
if self.main_model in models.GPT4_models: if self.main_model.is_gpt4():
method = self.update_files_gpt4 method = self.update_files_gpt4
elif self.main_model in models.GPT35_models: elif self.main_model.is_gpt35():
method = self.update_files_gpt35 method = self.update_files_gpt35
else: else:
raise ValueError(f"apply_updates() doesn't support {self.main_model.name}") raise ValueError(f"apply_updates() doesn't support {self.main_model.name}")

View file

@ -8,7 +8,7 @@ import git
import tiktoken import tiktoken
from prompt_toolkit.completion import Completion from prompt_toolkit.completion import Completion
from aider import models, prompts, utils from aider import prompts, utils
class Commands: class Commands:
@ -183,7 +183,7 @@ class Commands:
"was reset and removed from git.\n" "was reset and removed from git.\n"
) )
if self.coder.main_model in models.GPT4_models: if self.coder.main_model.is_gpt4():
return prompts.undo_command_reply return prompts.undo_command_reply
def cmd_diff(self, args): def cmd_diff(self, args):

View file

@ -84,7 +84,7 @@ def main(args=None, input=None, output=None):
action="store_const", action="store_const",
dest="model", dest="model",
const=models.GPT35_16k.name, const=models.GPT35_16k.name,
help=f"Use {models.GPT35.name} model for the main chat (not advised)", help=f"Use {models.GPT35_16k.name} model for the main chat (gpt-4 is better)",
) )
parser.add_argument( parser.add_argument(
"--pretty", "--pretty",

View file

@ -1,44 +1,37 @@
class Model:
def __init__(self, name, max_context_tokens):
self.name = name
self.max_context_tokens = max_context_tokens * 1024
# 4
GPT4_32k = Model("gpt-4-32k", 32)
GPT4_32k_0613 = Model("gpt-4-32k-0613", 32)
GPT4 = Model("gpt-4", 8)
GPT4_models = [GPT4, GPT4_32k, GPT4_32k_0613]
# 3.5
GPT35 = Model("gpt-3.5-turbo", 4)
GPT35_16k = Model("gpt-3.5-turbo-16k", 16)
GPT35_models = [GPT35, GPT35_16k]
import re import re
class Model:
def __init__(self, name, tokens=None):
self.name = name
if tokens is None:
match = re.search(r"-([0-9]+)k", name)
default_tokens = 8
tokens = int(match.group(1)) if match else default_tokens
self.max_context_tokens = tokens * 1024
def is_gpt4(self):
return self.name.startswith("gpt-4")
def is_gpt35(self):
return self.name.startswith("gpt-3.5-turbo")
def is_always_available(self):
return self.is_gpt35()
GPT4 = Model("gpt-4", 8)
GPT35 = Model("gpt-3.5-turbo")
GPT35_16k = Model("gpt-3.5-turbo-16k")
def get_model(name): def get_model(name):
models = GPT35_models + GPT4_models model = Model(name)
for model in models: if model.is_gpt4() or model.is_gpt35():
if model.name == name: return model
return model
match = re.search(r'-([0-9]+)k', name) raise ValueError(f"Unsupported model: {name}")
tokens = int(match.group(1)) if match else 0
model = Model(name, tokens)
if name.startswith("gpt-4-"):
GPT4_models.append(model)
elif name.startswith("gpt-3.5-"):
GPT35_models.append(model)
else:
raise ValueError(f"Unsupported model: {name}")
return model