support 8k output with 3.5 sonnet

2025-05-24 14:25:00 +00:00 · 2024-08-01 17:52:14 -03:00 · 2024-08-01 17:52:14 -03:00 · 5e818c2899
commit 5e818c2899
parent 32d82b3175
3 changed files with 25 additions and 5 deletions
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@ -866,7 +866,7 @@ class Coder:
                    self.io.tool_error(f"BadRequestError: {br_err}")
                    return
                except FinishReasonLength:
-                    # We hit the 4k output limit!
+                    # We hit the output limit!
                    if not self.main_model.can_prefill:
                        exhausted = True
                        break
@ -1108,7 +1108,7 @@ class Coder:
    def send(self, messages, model=None, functions=None):
        if not model:
-            model = self.main_model.name
+            model = self.main_model
        self.partial_response_content = ""
        self.partial_response_function_call = dict()
@ -1118,7 +1118,13 @@ class Coder:
        interrupted = False
        try:
            hash_object, completion = send_with_retries(
-                model, messages, functions, self.stream, self.temperature
+                model.name,
                messages,
                functions,
                self.stream,
                self.temperature,
                extra_headers=model.extra_headers,
                max_tokens=model.max_tokens,
            )
            self.chat_completion_call_hashes.append(hash_object.hexdigest())
--- a/aider/models.py
+++ b/aider/models.py
@ -62,7 +62,7 @@ ANTHROPIC_MODELS = [ln.strip() for ln in ANTHROPIC_MODELS.splitlines() if ln.str
 class ModelSettings:
    # Model class needs to have each of these as well
    name: str
-    edit_format: str
+    edit_format: str = "whole"
    weak_model_name: Optional[str] = None
    use_repo_map: bool = False
    send_undo_reply: bool = False
@ -71,6 +71,8 @@ class ModelSettings:
    reminder_as_sys_msg: bool = False
    examples_as_sys_msg: bool = False
    can_prefill: bool = False
    extra_headers: Optional[dict] = None
    max_tokens: Optional[int] = None
 # https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
@ -250,6 +252,8 @@ MODEL_SETTINGS = [
        examples_as_sys_msg=True,
        can_prefill=True,
        accepts_images=True,
        max_tokens=8192,
        extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"},
    ),
    ModelSettings(
        "anthropic/claude-3-5-sonnet-20240620",
@ -258,6 +262,8 @@ MODEL_SETTINGS = [
        use_repo_map=True,
        examples_as_sys_msg=True,
        can_prefill=True,
        max_tokens=8192,
        extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"},
    ),
    ModelSettings(
        "openrouter/anthropic/claude-3.5-sonnet",
@ -267,6 +273,8 @@ MODEL_SETTINGS = [
        examples_as_sys_msg=True,
        can_prefill=True,
        accepts_images=True,
        max_tokens=8192,
        extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"},
    ),
    # Vertex AI Claude models
    ModelSettings(
@ -277,6 +285,8 @@ MODEL_SETTINGS = [
        examples_as_sys_msg=True,
        can_prefill=True,
        accepts_images=True,
        max_tokens=8192,
        extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"},
    ),
    ModelSettings(
        "vertex_ai/claude-3-opus@20240229",
--- a/aider/sendchat.py
+++ b/aider/sendchat.py
@ -57,7 +57,9 @@ def lazy_litellm_retry_decorator(func):
@lazy_litellm_retry_decorator
-def send_with_retries(model_name, messages, functions, stream, temperature=0, extra_headers=None):
+def send_with_retries(
    model_name, messages, functions, stream, temperature=0, extra_headers=None, max_tokens=None
 ):
    from aider.llm import litellm
    kwargs = dict(
@ -70,6 +72,8 @@ def send_with_retries(model_name, messages, functions, stream, temperature=0, ex
        kwargs["functions"] = functions
    if extra_headers is not None:
        kwargs["extra_headers"] = extra_headers
    if max_tokens is not None:
        kwargs["max_tokens"] = max_tokens
    key = json.dumps(kwargs, sort_keys=True).encode()