feat: add PDF file support and refactor image handling

2025-05-29 16:54:59 +00:00 · 2024-11-26 17:19:28 -08:00 · 2024-11-26 17:19:28 -08:00 · b8f36c8277
commit b8f36c8277
parent 73c1dc697f
3 changed files with 7 additions and 10 deletions
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@ -704,7 +704,6 @@ class Coder:
                dict(role="assistant", content="Ok."),
            ]
        dump(chat_files_messages)
        return chat_files_messages
    def get_images_message(self):
@ -717,28 +716,26 @@ class Coder:
        image_messages = []
        for fname, content in self.get_abs_fnames_content():
            if is_image_file(fname):
                with open(fname, "rb") as image_file:
                    encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
                mime_type, _ = mimetypes.guess_type(fname)
                if not mime_type:
                    continue
                with open(fname, "rb") as image_file:
                    encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
                image_url = f"data:{mime_type};base64,{encoded_string}"
                rel_fname = self.get_rel_fname(fname)
                if mime_type.startswith("image/") and supports_images:
                    image_url = f"data:{mime_type};base64,{encoded_string}"
                    rel_fname = self.get_rel_fname(fname)
                    image_messages += [
                        {"type": "text", "text": f"Image file: {rel_fname}"},
                        {"type": "image_url", "image_url": {"url": image_url, "detail": "high"}},
                    ]
                elif mime_type == "application/pdf" and supports_pdfs:
                    image_url = f"data:{mime_type};base64,{encoded_string}"
                    rel_fname = self.get_rel_fname(fname)
                    image_messages += [
                        {"type": "text", "text": f"PDF file: {rel_fname}"},
                        {"type": "image_url", "image_url": image_url},
                    ]
        dump(image_messages)
        if not image_messages:
            return None
--- a/aider/models.py
+++ b/aider/models.py
@ -17,7 +17,7 @@ from aider.dump import dump  # noqa: F401
 from aider.llm import litellm
 DEFAULT_MODEL_NAME = "gpt-4o"
-ANTHROPIC_BETA_HEADER = "prompt-caching-2024-07-31"
+ANTHROPIC_BETA_HEADER = "prompt-caching-2024-07-31,pdfs-2024-09-25"
 OPENAI_MODELS = """
 gpt-4
--- a/aider/utils.py
+++ b/aider/utils.py
@ -13,7 +13,7 @@ import git
 from aider.dump import dump  # noqa: F401
-IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"}
+IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp", ".pdf"}
 class IgnorantTemporaryDirectory: