feat: add PDF file support and refactor image handling

This commit is contained in:
Paul Gauthier 2024-11-26 17:19:28 -08:00 committed by Paul Gauthier (aider)
parent 73c1dc697f
commit b8f36c8277
3 changed files with 7 additions and 10 deletions

View file

@ -704,7 +704,6 @@ class Coder:
dict(role="assistant", content="Ok."), dict(role="assistant", content="Ok."),
] ]
dump(chat_files_messages)
return chat_files_messages return chat_files_messages
def get_images_message(self): def get_images_message(self):
@ -717,28 +716,26 @@ class Coder:
image_messages = [] image_messages = []
for fname, content in self.get_abs_fnames_content(): for fname, content in self.get_abs_fnames_content():
if is_image_file(fname): if is_image_file(fname):
with open(fname, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
mime_type, _ = mimetypes.guess_type(fname) mime_type, _ = mimetypes.guess_type(fname)
if not mime_type: if not mime_type:
continue continue
with open(fname, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
image_url = f"data:{mime_type};base64,{encoded_string}"
rel_fname = self.get_rel_fname(fname)
if mime_type.startswith("image/") and supports_images: if mime_type.startswith("image/") and supports_images:
image_url = f"data:{mime_type};base64,{encoded_string}"
rel_fname = self.get_rel_fname(fname)
image_messages += [ image_messages += [
{"type": "text", "text": f"Image file: {rel_fname}"}, {"type": "text", "text": f"Image file: {rel_fname}"},
{"type": "image_url", "image_url": {"url": image_url, "detail": "high"}}, {"type": "image_url", "image_url": {"url": image_url, "detail": "high"}},
] ]
elif mime_type == "application/pdf" and supports_pdfs: elif mime_type == "application/pdf" and supports_pdfs:
image_url = f"data:{mime_type};base64,{encoded_string}"
rel_fname = self.get_rel_fname(fname)
image_messages += [ image_messages += [
{"type": "text", "text": f"PDF file: {rel_fname}"}, {"type": "text", "text": f"PDF file: {rel_fname}"},
{"type": "image_url", "image_url": image_url}, {"type": "image_url", "image_url": image_url},
] ]
dump(image_messages)
if not image_messages: if not image_messages:
return None return None

View file

@ -17,7 +17,7 @@ from aider.dump import dump # noqa: F401
from aider.llm import litellm from aider.llm import litellm
DEFAULT_MODEL_NAME = "gpt-4o" DEFAULT_MODEL_NAME = "gpt-4o"
ANTHROPIC_BETA_HEADER = "prompt-caching-2024-07-31" ANTHROPIC_BETA_HEADER = "prompt-caching-2024-07-31,pdfs-2024-09-25"
OPENAI_MODELS = """ OPENAI_MODELS = """
gpt-4 gpt-4

View file

@ -13,7 +13,7 @@ import git
from aider.dump import dump # noqa: F401 from aider.dump import dump # noqa: F401
IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"} IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp", ".pdf"}
class IgnorantTemporaryDirectory: class IgnorantTemporaryDirectory: