From d8f33a81242d05b130790d0e7cc2d83f74ea5542 Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Wed, 29 Nov 2023 21:20:29 +1300 Subject: [PATCH 01/16] Auto switch to gpt-4-vision-preview if image files added to context --- aider/coders/base_coder.py | 40 ++++++++++++++++++++++++++++----- aider/commands.py | 16 ++++++++++---- aider/io.py | 24 ++++++++++++++++++++ aider/models/model.py | 45 ++++++++++++++++++++++++++++++++++++++ aider/sendchat.py | 7 ++++++ aider/utils.py | 6 ++++- requirements.in | 1 + 7 files changed, 128 insertions(+), 11 deletions(-) diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 8aa4ecba1..fc01e9a33 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -38,6 +38,8 @@ class ExhaustedContextWindow(Exception): def wrap_fence(name): return f"<{name}>", f"" +#NOTE currently duplicated in io.py +IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'} class Coder: abs_fnames = None @@ -283,12 +285,13 @@ class Coder: prompt = "" for fname, content in self.get_abs_fnames_content(): - relative_fname = self.get_rel_fname(fname) - prompt += "\n" - prompt += relative_fname - prompt += f"\n{self.fence[0]}\n" - prompt += content - prompt += f"{self.fence[1]}\n" + if not any(fname.lower().endswith(ext) for ext in IMAGE_EXTENSIONS): + relative_fname = self.get_rel_fname(fname) + prompt += "\n" + prompt += relative_fname + prompt += f"\n{self.fence[0]}\n" + prompt += content + prompt += f"{self.fence[1]}\n" return prompt @@ -321,8 +324,33 @@ class Coder: dict(role="assistant", content="Ok."), ] + images_message = self.get_images_message() + if images_message is not None: + files_messages.append(images_message) + return files_messages + def get_images_message(self): + image_messages = [] + for fname, content in self.get_abs_fnames_content(): + if any(fname.lower().endswith(ext) for ext in IMAGE_EXTENSIONS): + image_url = f"data:image/{Path(fname).suffix.lstrip('.')};base64,{content}" + image_messages.append({ + "type": "image_url", + "image_url": { + "url": image_url, + "detail": "high" + } + }) + + if not image_messages: + return None + + return { + "role": "user", + "content": image_messages + } + def run(self, with_message=None): while True: try: diff --git a/aider/commands.py b/aider/commands.py index c6a534a45..81045c246 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -10,6 +10,8 @@ from aider import prompts, voice from .dump import dump # noqa: F401 +#NOTE currently duplicated in io.py and base_coder.py +IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'} class Commands: voice = None @@ -138,9 +140,13 @@ class Commands: for fname in self.coder.abs_fnames: relative_fname = self.coder.get_rel_fname(fname) content = self.io.read_text(fname) - # approximate - content = f"{relative_fname}\n```\n" + content + "```\n" - tokens = self.coder.main_model.token_count(content) + if any(relative_fname.endswith(ext) for ext in IMAGE_EXTENSIONS): + # If the file is an image, use the token_count_for_image method + tokens = self.coder.main_model.token_count_for_image(fname) + else: + # approximate + content = f"{relative_fname}\n```\n" + content + "```\n" + tokens = self.coder.main_model.token_count(content) res.append((tokens, f"{relative_fname}", "use /drop to drop from chat")) self.io.tool_output("Approximate context window usage, in tokens:") @@ -167,7 +173,9 @@ class Commands: self.io.tool_output("=" * (width + cost_width + 1)) self.io.tool_output(f"${total_cost:5.2f} {fmt(total)} tokens total") - limit = self.coder.main_model.max_context_tokens + # Check if any images are in the chat and override the max context window size if so + image_in_chat = any(relative_fname.endswith(ext) for ext in IMAGE_EXTENSIONS for relative_fname in self.coder.get_inchat_relative_files()) + limit = 4096 if image_in_chat else self.coder.main_model.max_context_tokens remaining = limit - total if remaining > 1024: self.io.tool_output(f"{cost_pad}{fmt(remaining)} tokens remaining in context window") diff --git a/aider/io.py b/aider/io.py index 1da68e83b..255efbd79 100644 --- a/aider/io.py +++ b/aider/io.py @@ -1,6 +1,7 @@ import os from collections import defaultdict from datetime import datetime +import base64 from pathlib import Path from prompt_toolkit.completion import Completer, Completion @@ -17,6 +18,9 @@ from rich.text import Text from .dump import dump # noqa: F401 +#QUESTION what image extensions do we want to support? +#QUESTION where should this live? Currently duplicated in base_coder +IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'} class AutoCompleter(Completer): def __init__(self, root, rel_fnames, addable_rel_fnames, commands, encoding): @@ -139,7 +143,27 @@ class InputOutput: current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") self.append_chat_history(f"\n# aider chat started at {current_time}\n\n") + + def read_image(self, filename): + try: + with open(str(filename), "rb") as image_file: + encoded_string = base64.b64encode(image_file.read()) + return encoded_string.decode('utf-8') + except FileNotFoundError: + self.tool_error(f"{filename}: file not found error") + return + except IsADirectoryError: + self.tool_error(f"{filename}: is a directory") + return + except Exception as e: + self.tool_error(f"{filename}: {e}") + return + def read_text(self, filename): + file_extension = Path(filename).suffix.lower() + if file_extension in IMAGE_EXTENSIONS: + return self.read_image(filename) + try: with open(str(filename), "r", encoding=self.encoding) as f: return f.read() diff --git a/aider/models/model.py b/aider/models/model.py index 9b1a3daab..3b394efb8 100644 --- a/aider/models/model.py +++ b/aider/models/model.py @@ -1,6 +1,8 @@ import json +import math import openai +from PIL import Image class Model: @@ -51,3 +53,46 @@ class Model: msgs = json.dumps(messages) return len(self.tokenizer.encode(msgs)) + + def token_count_for_image(self, fname): + """ + Calculate the token cost for an image assuming high detail. + The token cost is determined by the size of the image. + :param fname: The filename of the image. + :return: The token cost for the image. + """ + # Placeholder for image size retrieval logic + # TODO: Implement the logic to retrieve the image size from the file + width, height = self.get_image_size(fname) + + # If the image is larger than 2048 in any dimension, scale it down to fit within 2048x2048 + max_dimension = max(width, height) + if max_dimension > 2048: + scale_factor = 2048 / max_dimension + width = int(width * scale_factor) + height = int(height * scale_factor) + + # Scale the image such that the shortest side is 768 pixels long + min_dimension = min(width, height) + scale_factor = 768 / min_dimension + width = int(width * scale_factor) + height = int(height * scale_factor) + + # Calculate the number of 512x512 tiles needed to cover the image + tiles_width = math.ceil(width / 512) + tiles_height = math.ceil(height / 512) + num_tiles = tiles_width * tiles_height + + # Each tile costs 170 tokens, and there's an additional fixed cost of 85 tokens + token_cost = num_tiles * 170 + 85 + return token_cost + + + def get_image_size(self, fname): + """ + Retrieve the size of an image. + :param fname: The filename of the image. + :return: A tuple (width, height) representing the image size in pixels. + """ + with Image.open(fname) as img: + return img.size diff --git a/aider/sendchat.py b/aider/sendchat.py index 7c2994dcc..fb190f85c 100644 --- a/aider/sendchat.py +++ b/aider/sendchat.py @@ -53,6 +53,13 @@ def send_with_retries(model_name, messages, functions, stream): if "openrouter.ai" in openai.api_base: kwargs["headers"] = {"HTTP-Referer": "http://aider.chat", "X-Title": "Aider"} + # Check conditions to switch to gpt-4-vision-preview + if "openrouter.ai" not in openai.api_base and model_name.startswith("gpt-4"): + if any(isinstance(msg.get("content"), list) and any("image_url" in item for item in msg.get("content") if isinstance(item, dict)) for msg in messages): + kwargs['model'] = "gpt-4-vision-preview" + # looks like gpt-4-vision is limited to max tokens of 4096 + kwargs["max_tokens"] = 4096 + key = json.dumps(kwargs, sort_keys=True).encode() # Generate SHA1 hash of kwargs and append it to chat_completion_call_hashes diff --git a/aider/utils.py b/aider/utils.py index 5147314cc..98d6a27b3 100644 --- a/aider/utils.py +++ b/aider/utils.py @@ -16,7 +16,11 @@ def show_messages(messages, title=None, functions=None): for msg in messages: role = msg["role"].upper() content = msg.get("content") - if content: + if isinstance(content, list): # Handle list content (e.g., image messages) + for item in content: + if isinstance(item, dict) and "image_url" in item: + print(role, "Image URL:", item["image_url"]["url"]) + elif isinstance(content, str): # Handle string content for line in content.splitlines(): print(role, line) content = msg.get("function_call") diff --git a/requirements.in b/requirements.in index d7fdf182a..fe07f3dae 100644 --- a/requirements.in +++ b/requirements.in @@ -16,3 +16,4 @@ packaging sounddevice soundfile PyYAML +Pillow From 58ba202b51e9cf2889cb578c2f50312794d35e41 Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Wed, 29 Nov 2023 22:57:31 +1300 Subject: [PATCH 02/16] compiling requirements.txt --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index e074caea7..98bb77582 100644 --- a/requirements.txt +++ b/requirements.txt @@ -71,6 +71,8 @@ pathspec==0.11.2 # via # -r requirements.in # grep-ast +pillow==10.1.0 + # via -r requirements.in prompt-toolkit==3.0.39 # via -r requirements.in pycparser==2.21 From dc6aeb638d5fe3e9bffac11e57b4f358b00366eb Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Wed, 29 Nov 2023 23:26:02 +1300 Subject: [PATCH 03/16] gpt-4-vision-preiview can receive 128k tokens --- aider/commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aider/commands.py b/aider/commands.py index 81045c246..0ccc1f273 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -175,7 +175,7 @@ class Commands: # Check if any images are in the chat and override the max context window size if so image_in_chat = any(relative_fname.endswith(ext) for ext in IMAGE_EXTENSIONS for relative_fname in self.coder.get_inchat_relative_files()) - limit = 4096 if image_in_chat else self.coder.main_model.max_context_tokens + limit = 128000 if image_in_chat else self.coder.main_model.max_context_tokens remaining = limit - total if remaining > 1024: self.io.tool_output(f"{cost_pad}{fmt(remaining)} tokens remaining in context window") From 91bbb0a02cb932aeb59569bc0dc4d3754f1229e4 Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Thu, 30 Nov 2023 11:09:56 +1300 Subject: [PATCH 04/16] TODOs for making image code robust --- aider/coders/base_coder.py | 2 ++ aider/commands.py | 1 + aider/retrievers/fleet.py | 0 3 files changed, 3 insertions(+) create mode 100644 aider/retrievers/fleet.py diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index fc01e9a33..e15156adf 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -418,6 +418,7 @@ class Coder: self.done_messages += self.cur_messages self.summarize_start() + #TODO check for impact on image messages if message: self.done_messages += [ dict(role="user", content=message), @@ -464,6 +465,7 @@ class Coder: dict(role="system", content=self.fmt_system_prompt(self.gpt_prompts.system_reminder)), ] + #TODO review impact of token count on image messages messages_tokens = self.main_model.token_count(messages) reminder_tokens = self.main_model.token_count(reminder_message) cur_tokens = self.main_model.token_count(self.cur_messages) diff --git a/aider/commands.py b/aider/commands.py index 0ccc1f273..a868d2175 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -327,6 +327,7 @@ class Commands: if abs_file_path in self.coder.abs_fnames: self.io.tool_error(f"{matched_file} is already in the chat") else: + #TODO put in guard to stop images being added to non openai / gpt-4 content = self.io.read_text(abs_file_path) if content is None: self.io.tool_error(f"Unable to read {matched_file}") diff --git a/aider/retrievers/fleet.py b/aider/retrievers/fleet.py new file mode 100644 index 000000000..e69de29bb From b7d90197a9fc3fb6fc807b985c4f2a24da16c983 Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Mon, 11 Dec 2023 20:50:02 +1300 Subject: [PATCH 05/16] update openrouter model to work with new openai client --- aider/models/openrouter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aider/models/openrouter.py b/aider/models/openrouter.py index 6c9eec21e..6271e703f 100644 --- a/aider/models/openrouter.py +++ b/aider/models/openrouter.py @@ -21,13 +21,13 @@ class OpenRouterModel(Model): if cached_model_details is None: cached_model_details = client.models.list().data found = next( - (details for details in cached_model_details if details.get("id") == name), None + (details for details in cached_model_details if details.id == name), None ) if found: - self.max_context_tokens = int(found.get("context_length")) - self.prompt_price = round(float(found.get("pricing").get("prompt")) * 1000, 6) - self.completion_price = round(float(found.get("pricing").get("completion")) * 1000, 6) + self.max_context_tokens = int(found.context_length) + self.prompt_price = round(float(found.pricing.get("prompt")) * 1000, 6) + self.completion_price = round(float(found.pricing.get("completion")) * 1000, 6) else: raise ValueError(f"invalid openrouter model: {name}") From 3d8599617d079e913376b592949697c6f17923b0 Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Mon, 11 Dec 2023 20:56:20 +1300 Subject: [PATCH 06/16] Switch to gpt-4-vision-preview if baseurl.host includes api.openai.com/ and gpt-4, otherwise strip out any image_url messages. --- aider/sendchat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aider/sendchat.py b/aider/sendchat.py index bca953856..a2a50fac2 100644 --- a/aider/sendchat.py +++ b/aider/sendchat.py @@ -42,6 +42,7 @@ def send_with_retries(client, model_name, messages, functions, stream): kwargs["functions"] = functions # Check conditions to switch to gpt-4-vision-preview + # TODO if baseurl.host does include api.openai.com/ and gpt-4 then switch the models, if it doesn't then strip out any image_url messages if client and client.base_url.host != "openrouter.ai" and model_name.startswith("gpt-4"): if any(isinstance(msg.get("content"), list) and any("image_url" in item for item in msg.get("content") if isinstance(item, dict)) for msg in messages): kwargs['model'] = "gpt-4-vision-preview" From d0255ce2aed98d7a72102627a6779c3034e32e73 Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Mon, 11 Dec 2023 20:56:23 +1300 Subject: [PATCH 07/16] better logic for image handling --- aider/sendchat.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/aider/sendchat.py b/aider/sendchat.py index a2a50fac2..b1496488c 100644 --- a/aider/sendchat.py +++ b/aider/sendchat.py @@ -41,13 +41,19 @@ def send_with_retries(client, model_name, messages, functions, stream): if functions is not None: kwargs["functions"] = functions - # Check conditions to switch to gpt-4-vision-preview - # TODO if baseurl.host does include api.openai.com/ and gpt-4 then switch the models, if it doesn't then strip out any image_url messages - if client and client.base_url.host != "openrouter.ai" and model_name.startswith("gpt-4"): - if any(isinstance(msg.get("content"), list) and any("image_url" in item for item in msg.get("content") if isinstance(item, dict)) for msg in messages): - kwargs['model'] = "gpt-4-vision-preview" - # looks like gpt-4-vision is limited to max tokens of 4096 - kwargs["max_tokens"] = 4096 + # Check conditions to switch to gpt-4-vision-preview or strip out image_url messages + if client and model_name.startswith("gpt-4"): + if client.base_url.host != "api.openai.com": + if any(isinstance(msg.get("content"), list) and any("image_url" in item for item in msg.get("content") if isinstance(item, dict)) for msg in messages): + kwargs['model'] = "gpt-4-vision-preview" + # gpt-4-vision is limited to max tokens of 4096 + kwargs["max_tokens"] = 4096 + else: + # Strip out any image_url messages if not using gpt-4-vision-preview + messages = [ + {k: v for k, v in msg.items() if k != "content" or not any(isinstance(item, dict) and "image_url" in item for item in v)} + for msg in messages if isinstance(msg.get("content"), list) + ] + [msg for msg in messages if not isinstance(msg.get("content"), list)] key = json.dumps(kwargs, sort_keys=True).encode() From 90fb538015a73aededffd1760815682ae3d1b637 Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Mon, 11 Dec 2023 21:03:30 +1300 Subject: [PATCH 08/16] fix logic for image switching --- aider/sendchat.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/aider/sendchat.py b/aider/sendchat.py index b1496488c..d5e62d97f 100644 --- a/aider/sendchat.py +++ b/aider/sendchat.py @@ -42,18 +42,19 @@ def send_with_retries(client, model_name, messages, functions, stream): kwargs["functions"] = functions # Check conditions to switch to gpt-4-vision-preview or strip out image_url messages - if client and model_name.startswith("gpt-4"): - if client.base_url.host != "api.openai.com": - if any(isinstance(msg.get("content"), list) and any("image_url" in item for item in msg.get("content") if isinstance(item, dict)) for msg in messages): - kwargs['model'] = "gpt-4-vision-preview" - # gpt-4-vision is limited to max tokens of 4096 - kwargs["max_tokens"] = 4096 - else: - # Strip out any image_url messages if not using gpt-4-vision-preview - messages = [ - {k: v for k, v in msg.items() if k != "content" or not any(isinstance(item, dict) and "image_url" in item for item in v)} - for msg in messages if isinstance(msg.get("content"), list) - ] + [msg for msg in messages if not isinstance(msg.get("content"), list)] + if client and model_name.startswith("gpt-4") and "api.openai.com" in client.base_url.host: + print('switch model') + if any(isinstance(msg.get("content"), list) and any("image_url" in item for item in msg.get("content") if isinstance(item, dict)) for msg in messages): + kwargs['model'] = "gpt-4-vision-preview" + # gpt-4-vision is limited to max tokens of 4096 + kwargs["max_tokens"] = 4096 + else: + # Strip out any image_url messages if not using gpt-4-vision-preview + print('strip img') + messages = [ + {k: v for k, v in msg.items() if k != "content" or not any(isinstance(item, dict) and "image_url" in item for item in v)} + for msg in messages if isinstance(msg.get("content"), list) + ] + [msg for msg in messages if not isinstance(msg.get("content"), list)] key = json.dumps(kwargs, sort_keys=True).encode() From c919f9f0c6816fd87deb05c1d1cd927e7cf22b58 Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Mon, 11 Dec 2023 21:13:07 +1300 Subject: [PATCH 09/16] handle switching to gpt4-vision-preview --- aider/commands.py | 5 ++--- aider/models/openrouter.py | 1 - aider/sendchat.py | 8 -------- aider/utils.py | 12 ++++++++++++ 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/aider/commands.py b/aider/commands.py index 3bb75ee43..30960c1d8 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -10,8 +10,7 @@ from aider import prompts, voice from .dump import dump # noqa: F401 -#NOTE currently duplicated in io.py and base_coder.py -IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'} +from aider.utils import is_image_file class Commands: voice = None @@ -140,7 +139,7 @@ class Commands: for fname in self.coder.abs_fnames: relative_fname = self.coder.get_rel_fname(fname) content = self.io.read_text(fname) - if any(relative_fname.endswith(ext) for ext in IMAGE_EXTENSIONS): + if is_image_file(relative_fname): # If the file is an image, use the token_count_for_image method tokens = self.coder.main_model.token_count_for_image(fname) else: diff --git a/aider/models/openrouter.py b/aider/models/openrouter.py index 6271e703f..a7d0be82d 100644 --- a/aider/models/openrouter.py +++ b/aider/models/openrouter.py @@ -33,7 +33,6 @@ class OpenRouterModel(Model): raise ValueError(f"invalid openrouter model: {name}") -# TODO run benchmarks and figure out which models support which edit-formats def edit_format_for_model(name): if any(str in name for str in ["gpt-4", "claude-2"]): return "diff" diff --git a/aider/sendchat.py b/aider/sendchat.py index d5e62d97f..d8ac92625 100644 --- a/aider/sendchat.py +++ b/aider/sendchat.py @@ -43,18 +43,10 @@ def send_with_retries(client, model_name, messages, functions, stream): # Check conditions to switch to gpt-4-vision-preview or strip out image_url messages if client and model_name.startswith("gpt-4") and "api.openai.com" in client.base_url.host: - print('switch model') if any(isinstance(msg.get("content"), list) and any("image_url" in item for item in msg.get("content") if isinstance(item, dict)) for msg in messages): kwargs['model'] = "gpt-4-vision-preview" # gpt-4-vision is limited to max tokens of 4096 kwargs["max_tokens"] = 4096 - else: - # Strip out any image_url messages if not using gpt-4-vision-preview - print('strip img') - messages = [ - {k: v for k, v in msg.items() if k != "content" or not any(isinstance(item, dict) and "image_url" in item for item in v)} - for msg in messages if isinstance(msg.get("content"), list) - ] + [msg for msg in messages if not isinstance(msg.get("content"), list)] key = json.dumps(kwargs, sort_keys=True).encode() diff --git a/aider/utils.py b/aider/utils.py index 98d6a27b3..0753ad4ea 100644 --- a/aider/utils.py +++ b/aider/utils.py @@ -1,7 +1,19 @@ from pathlib import Path +# Set of image file extensions +IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'} + from .dump import dump # noqa: F401 +def is_image_file(file_name): + """ + Check if the given file name has an image file extension. + + :param file_name: The name of the file to check. + :return: True if the file is an image, False otherwise. + """ + return any(file_name.endswith(ext) for ext in IMAGE_EXTENSIONS) + def safe_abs_path(res): "Gives an abs path, which safely returns a full (not 8.3) windows path" From f9ba8e7b41ac697d2fefcee5c9a140f715cba957 Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Mon, 11 Dec 2023 21:53:53 +1300 Subject: [PATCH 10/16] Remove unnecessary comment and method call in Commands class. --- aider/coders/base_coder.py | 10 ++++++---- aider/commands.py | 7 ++++--- aider/io.py | 7 ++----- aider/sendchat.py | 4 +++- aider/utils.py | 10 ++++++++++ 5 files changed, 25 insertions(+), 13 deletions(-) diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 6e99a1839..6c6dc2c19 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -24,6 +24,7 @@ from aider.repo import GitRepo from aider.repomap import RepoMap from aider.sendchat import send_with_retries +from aider.utils import is_image_file from ..dump import dump # noqa: F401 @@ -38,8 +39,6 @@ class ExhaustedContextWindow(Exception): def wrap_fence(name): return f"<{name}>", f"" -#NOTE currently duplicated in io.py -IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'} class Coder: client = None @@ -294,7 +293,7 @@ class Coder: prompt = "" for fname, content in self.get_abs_fnames_content(): - if not any(fname.lower().endswith(ext) for ext in IMAGE_EXTENSIONS): + if not is_image_file(fname): relative_fname = self.get_rel_fname(fname) prompt += "\n" prompt += relative_fname @@ -341,9 +340,12 @@ class Coder: return files_messages def get_images_message(self): + if not utils.is_gpt4_with_openai_base_url(self.main_model.name, self.client): + return None + image_messages = [] for fname, content in self.get_abs_fnames_content(): - if any(fname.lower().endswith(ext) for ext in IMAGE_EXTENSIONS): + if is_image_file(fname): image_url = f"data:image/{Path(fname).suffix.lstrip('.')};base64,{content}" image_messages.append({ "type": "image_url", diff --git a/aider/commands.py b/aider/commands.py index 30960c1d8..ef9e83f3c 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -140,7 +140,6 @@ class Commands: relative_fname = self.coder.get_rel_fname(fname) content = self.io.read_text(fname) if is_image_file(relative_fname): - # If the file is an image, use the token_count_for_image method tokens = self.coder.main_model.token_count_for_image(fname) else: # approximate @@ -172,8 +171,10 @@ class Commands: self.io.tool_output("=" * (width + cost_width + 1)) self.io.tool_output(f"${total_cost:5.2f} {fmt(total)} tokens total") - # Check if any images are in the chat and override the max context window size if so - image_in_chat = any(relative_fname.endswith(ext) for ext in IMAGE_EXTENSIONS for relative_fname in self.coder.get_inchat_relative_files()) + # Set image_in_chat to False unless is_gpt4_with_openai_base_url returns True + image_in_chat = False + if utils.is_gpt4_with_openai_base_url(self.coder.main_model.name, self.coder.client): + image_in_chat = any(is_image_file(relative_fname) for relative_fname in self.coder.get_inchat_relative_files()) limit = 128000 if image_in_chat else self.coder.main_model.max_context_tokens remaining = limit - total if remaining > 1024: diff --git a/aider/io.py b/aider/io.py index 255efbd79..410d72e20 100644 --- a/aider/io.py +++ b/aider/io.py @@ -16,11 +16,9 @@ from pygments.util import ClassNotFound from rich.console import Console from rich.text import Text +from .utils import is_image_file from .dump import dump # noqa: F401 -#QUESTION what image extensions do we want to support? -#QUESTION where should this live? Currently duplicated in base_coder -IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'} class AutoCompleter(Completer): def __init__(self, root, rel_fnames, addable_rel_fnames, commands, encoding): @@ -160,8 +158,7 @@ class InputOutput: return def read_text(self, filename): - file_extension = Path(filename).suffix.lower() - if file_extension in IMAGE_EXTENSIONS: + if is_image_file(filename): return self.read_image(filename) try: diff --git a/aider/sendchat.py b/aider/sendchat.py index d8ac92625..18956b83c 100644 --- a/aider/sendchat.py +++ b/aider/sendchat.py @@ -41,8 +41,10 @@ def send_with_retries(client, model_name, messages, functions, stream): if functions is not None: kwargs["functions"] = functions + from aider.utils import is_gpt4_with_openai_base_url + # Check conditions to switch to gpt-4-vision-preview or strip out image_url messages - if client and model_name.startswith("gpt-4") and "api.openai.com" in client.base_url.host: + if client and is_gpt4_with_openai_base_url(model_name, client): if any(isinstance(msg.get("content"), list) and any("image_url" in item for item in msg.get("content") if isinstance(item, dict)) for msg in messages): kwargs['model'] = "gpt-4-vision-preview" # gpt-4-vision is limited to max tokens of 4096 diff --git a/aider/utils.py b/aider/utils.py index 0753ad4ea..2b02f7bdb 100644 --- a/aider/utils.py +++ b/aider/utils.py @@ -1,4 +1,5 @@ from pathlib import Path +from openai import OpenAIError # Set of image file extensions IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'} @@ -41,3 +42,12 @@ def show_messages(messages, title=None, functions=None): if functions: dump(functions) +def is_gpt4_with_openai_base_url(model_name, client): + """ + Check if the model_name starts with 'gpt-4' and the client base URL includes 'api.openai.com'. + + :param model_name: The name of the model to check. + :param client: The OpenAI client instance. + :return: True if conditions are met, False otherwise. + """ + return model_name.startswith("gpt-4") and "api.openai.com" in client.base_url.host From 9ceaf97f08b6e71466ad703c7b31e95486133734 Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Mon, 11 Dec 2023 22:21:24 +1300 Subject: [PATCH 11/16] making image code more robust --- aider/commands.py | 11 +++++++---- aider/sendchat.py | 2 +- aider/utils.py | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/aider/commands.py b/aider/commands.py index ef9e83f3c..d58198cd1 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -10,7 +10,7 @@ from aider import prompts, voice from .dump import dump # noqa: F401 -from aider.utils import is_image_file +from aider.utils import is_image_file, is_gpt4_with_openai_base_url class Commands: voice = None @@ -171,11 +171,12 @@ class Commands: self.io.tool_output("=" * (width + cost_width + 1)) self.io.tool_output(f"${total_cost:5.2f} {fmt(total)} tokens total") - # Set image_in_chat to False unless is_gpt4_with_openai_base_url returns True + # only switch to image model token count if gpt4 and openai and image in files image_in_chat = False - if utils.is_gpt4_with_openai_base_url(self.coder.main_model.name, self.coder.client): + if is_gpt4_with_openai_base_url(self.coder.main_model.name, self.coder.client): image_in_chat = any(is_image_file(relative_fname) for relative_fname in self.coder.get_inchat_relative_files()) limit = 128000 if image_in_chat else self.coder.main_model.max_context_tokens + remaining = limit - total if remaining > 1024: self.io.tool_output(f"{cost_pad}{fmt(remaining)} tokens remaining in context window") @@ -327,7 +328,9 @@ class Commands: if abs_file_path in self.coder.abs_fnames: self.io.tool_error(f"{matched_file} is already in the chat") else: - #TODO put in guard to stop images being added to non openai / gpt-4 + if is_image_file(matched_file) and not is_gpt4_with_openai_base_url(self.coder.main_model.name, self.coder.client): + self.io.tool_error(f"Cannot add image file {matched_file} as the model does not support image files") + continue content = self.io.read_text(abs_file_path) if content is None: self.io.tool_error(f"Unable to read {matched_file}") diff --git a/aider/sendchat.py b/aider/sendchat.py index 18956b83c..64aa9c7b7 100644 --- a/aider/sendchat.py +++ b/aider/sendchat.py @@ -8,6 +8,7 @@ import openai # from diskcache import Cache from openai import APIConnectionError, InternalServerError, RateLimitError +from aider.utils import is_gpt4_with_openai_base_url from aider.dump import dump # noqa: F401 CACHE_PATH = "~/.aider.send.cache.v1" @@ -41,7 +42,6 @@ def send_with_retries(client, model_name, messages, functions, stream): if functions is not None: kwargs["functions"] = functions - from aider.utils import is_gpt4_with_openai_base_url # Check conditions to switch to gpt-4-vision-preview or strip out image_url messages if client and is_gpt4_with_openai_base_url(model_name, client): diff --git a/aider/utils.py b/aider/utils.py index 2b02f7bdb..c2053e818 100644 --- a/aider/utils.py +++ b/aider/utils.py @@ -1,5 +1,4 @@ from pathlib import Path -from openai import OpenAIError # Set of image file extensions IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'} @@ -42,6 +41,7 @@ def show_messages(messages, title=None, functions=None): if functions: dump(functions) + def is_gpt4_with_openai_base_url(model_name, client): """ Check if the model_name starts with 'gpt-4' and the client base URL includes 'api.openai.com'. From 90d507170949972822005f6c1c1cad8e8e052e85 Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Mon, 11 Dec 2023 22:37:23 +1300 Subject: [PATCH 12/16] fix failing tests --- aider/utils.py | 3 +++ tests/test_models.py | 26 +++++++++++++------------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/aider/utils.py b/aider/utils.py index c2053e818..c9f41363f 100644 --- a/aider/utils.py +++ b/aider/utils.py @@ -12,6 +12,7 @@ def is_image_file(file_name): :param file_name: The name of the file to check. :return: True if the file is an image, False otherwise. """ + file_name = str(file_name) # Convert file_name to string return any(file_name.endswith(ext) for ext in IMAGE_EXTENSIONS) @@ -50,4 +51,6 @@ def is_gpt4_with_openai_base_url(model_name, client): :param client: The OpenAI client instance. :return: True if conditions are met, False otherwise. """ + if client is None or not hasattr(client, 'base_url'): + return False return model_name.startswith("gpt-4") and "api.openai.com" in client.base_url.host diff --git a/tests/test_models.py b/tests/test_models.py index fe8b681dc..6b2dc58ce 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -29,19 +29,19 @@ class TestModels(unittest.TestCase): def test_openrouter_model_properties(self): client = MagicMock() - client.models.list.return_value = { - "data": [ - { - "id": "openai/gpt-4", - "object": "model", - "context_length": "8192", - "pricing": {"prompt": "0.00006", "completion": "0.00012"}, - } - ] - } - client.models.list.return_value = type( - "", (), {"data": client.models.list.return_value["data"]} - )() + class ModelData: + def __init__(self, id, object, context_length, pricing): + self.id = id + self.object = object + self.context_length = context_length + self.pricing = pricing + + model_data = ModelData("openai/gpt-4", "model", "8192", {"prompt": "0.00006", "completion": "0.00012"}) + class ModelList: + def __init__(self, data): + self.data = data + + client.models.list.return_value = ModelList([model_data]) model = OpenRouterModel(client, "gpt-4") self.assertEqual(model.name, "openai/gpt-4") From 9aa7beff7251f5873e7f752a41d3d810f7938d6b Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Mon, 11 Dec 2023 22:42:12 +1300 Subject: [PATCH 13/16] add feature for adding an image to context --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1f7910e8f..1f0b7ea63 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,7 @@ You can find more chat transcripts on the [examples page](https://aider.chat/exa * You can use aider with multiple source files at once, so GPT can make coordinated code changes across all of them in a single changeset/commit. * Aider can [give *GPT-4* a map of your entire git repo](https://aider.chat/docs/repomap.html), which helps it understand and modify large codebases. * You can also edit files by hand using your editor while chatting with aider. Aider will notice these out-of-band edits and keep GPT up to date with the latest versions of your files. This lets you bounce back and forth between the aider chat and your editor, to collaboratively code with GPT. +* If you are using gpt-4 through openai directly, you can add image files to your context which will automatically switch you to the gpt-4-vision-preview model ## Usage From 48c680ffb6a9677e53d7b374f133823aca7b4ac8 Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Tue, 19 Dec 2023 23:31:37 +1300 Subject: [PATCH 14/16] fix for openrouter which doesn't return completion prices --- aider/coders/base_coder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 0bee6d5cd..002aa8517 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -693,7 +693,7 @@ class Coder: raise Exception("No data found in openai response!") tokens = None - if hasattr(completion, "usage"): + if hasattr(completion, "usage") and completion.usage is not None: prompt_tokens = completion.usage.prompt_tokens completion_tokens = completion.usage.completion_tokens From d4e663f7bc60135749da0381a245420d4d49c991 Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Wed, 20 Dec 2023 10:13:28 +1300 Subject: [PATCH 15/16] benchmark work with openrouter --- benchmark/benchmark.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 1a22c0f62..e61cf038a 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -782,7 +782,22 @@ def run_test( chat_history_file=history_fname, ) - main_model = models.Model.create(model_name) + # Check if AIDER_DOCKER environment variable is set and create client accordingly + if "OPENAI_API_BASE" in os.environ and "openrouter.ai" in os.environ["OPENAI_API_BASE"]: + client = openai.OpenAI( + api_key=os.environ["OPENAI_API_KEY"], + base_url=os.environ.get("OPENAI_API_BASE", "https://api.openai.com"), + default_headers={ + "HTTP-Referer": "http://aider.chat", + "X-Title": "Aider", + } + ) + else: + client = openai.OpenAI( + api_key=os.environ["OPENAI_API_KEY"], + ) + + main_model = models.Model.create(model_name, client) edit_format = edit_format or main_model.edit_format dump(main_model) @@ -790,8 +805,6 @@ def run_test( show_fnames = ",".join(map(str, fnames)) print("fnames:", show_fnames) - client = openai.OpenAI(api_key=os.environ["OPENAI_API_KEY"]) - coder = Coder.create( main_model, edit_format, From 93f32d385576aff0b75917e55779f4eaf4b2fcac Mon Sep 17 00:00:00 2001 From: Joshua Vial Date: Thu, 21 Dec 2023 09:36:06 +1300 Subject: [PATCH 16/16] make benchmark listen to openai_api_base env var --- benchmark/benchmark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 12979cda8..d091e813e 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -787,11 +787,10 @@ def run_test( chat_history_file=history_fname, ) - # Check if AIDER_DOCKER environment variable is set and create client accordingly if "OPENAI_API_BASE" in os.environ and "openrouter.ai" in os.environ["OPENAI_API_BASE"]: client = openai.OpenAI( api_key=os.environ["OPENAI_API_KEY"], - base_url=os.environ.get("OPENAI_API_BASE", "https://api.openai.com"), + base_url=os.environ.get("OPENAI_API_BASE"), default_headers={ "HTTP-Referer": "http://aider.chat", "X-Title": "Aider", @@ -800,6 +799,7 @@ def run_test( else: client = openai.OpenAI( api_key=os.environ["OPENAI_API_KEY"], + base_url=os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1"), ) main_model = models.Model.create(model_name, client)