support 8k output with 3.5 sonnet

This commit is contained in:
Paul Gauthier 2024-08-01 17:52:14 -03:00
parent 32d82b3175
commit 5e818c2899
3 changed files with 25 additions and 5 deletions

View file

@ -866,7 +866,7 @@ class Coder:
self.io.tool_error(f"BadRequestError: {br_err}") self.io.tool_error(f"BadRequestError: {br_err}")
return return
except FinishReasonLength: except FinishReasonLength:
# We hit the 4k output limit! # We hit the output limit!
if not self.main_model.can_prefill: if not self.main_model.can_prefill:
exhausted = True exhausted = True
break break
@ -1108,7 +1108,7 @@ class Coder:
def send(self, messages, model=None, functions=None): def send(self, messages, model=None, functions=None):
if not model: if not model:
model = self.main_model.name model = self.main_model
self.partial_response_content = "" self.partial_response_content = ""
self.partial_response_function_call = dict() self.partial_response_function_call = dict()
@ -1118,7 +1118,13 @@ class Coder:
interrupted = False interrupted = False
try: try:
hash_object, completion = send_with_retries( hash_object, completion = send_with_retries(
model, messages, functions, self.stream, self.temperature model.name,
messages,
functions,
self.stream,
self.temperature,
extra_headers=model.extra_headers,
max_tokens=model.max_tokens,
) )
self.chat_completion_call_hashes.append(hash_object.hexdigest()) self.chat_completion_call_hashes.append(hash_object.hexdigest())

View file

@ -62,7 +62,7 @@ ANTHROPIC_MODELS = [ln.strip() for ln in ANTHROPIC_MODELS.splitlines() if ln.str
class ModelSettings: class ModelSettings:
# Model class needs to have each of these as well # Model class needs to have each of these as well
name: str name: str
edit_format: str edit_format: str = "whole"
weak_model_name: Optional[str] = None weak_model_name: Optional[str] = None
use_repo_map: bool = False use_repo_map: bool = False
send_undo_reply: bool = False send_undo_reply: bool = False
@ -71,6 +71,8 @@ class ModelSettings:
reminder_as_sys_msg: bool = False reminder_as_sys_msg: bool = False
examples_as_sys_msg: bool = False examples_as_sys_msg: bool = False
can_prefill: bool = False can_prefill: bool = False
extra_headers: Optional[dict] = None
max_tokens: Optional[int] = None
# https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo # https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
@ -250,6 +252,8 @@ MODEL_SETTINGS = [
examples_as_sys_msg=True, examples_as_sys_msg=True,
can_prefill=True, can_prefill=True,
accepts_images=True, accepts_images=True,
max_tokens=8192,
extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"},
), ),
ModelSettings( ModelSettings(
"anthropic/claude-3-5-sonnet-20240620", "anthropic/claude-3-5-sonnet-20240620",
@ -258,6 +262,8 @@ MODEL_SETTINGS = [
use_repo_map=True, use_repo_map=True,
examples_as_sys_msg=True, examples_as_sys_msg=True,
can_prefill=True, can_prefill=True,
max_tokens=8192,
extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"},
), ),
ModelSettings( ModelSettings(
"openrouter/anthropic/claude-3.5-sonnet", "openrouter/anthropic/claude-3.5-sonnet",
@ -267,6 +273,8 @@ MODEL_SETTINGS = [
examples_as_sys_msg=True, examples_as_sys_msg=True,
can_prefill=True, can_prefill=True,
accepts_images=True, accepts_images=True,
max_tokens=8192,
extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"},
), ),
# Vertex AI Claude models # Vertex AI Claude models
ModelSettings( ModelSettings(
@ -277,6 +285,8 @@ MODEL_SETTINGS = [
examples_as_sys_msg=True, examples_as_sys_msg=True,
can_prefill=True, can_prefill=True,
accepts_images=True, accepts_images=True,
max_tokens=8192,
extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"},
), ),
ModelSettings( ModelSettings(
"vertex_ai/claude-3-opus@20240229", "vertex_ai/claude-3-opus@20240229",

View file

@ -57,7 +57,9 @@ def lazy_litellm_retry_decorator(func):
@lazy_litellm_retry_decorator @lazy_litellm_retry_decorator
def send_with_retries(model_name, messages, functions, stream, temperature=0, extra_headers=None): def send_with_retries(
model_name, messages, functions, stream, temperature=0, extra_headers=None, max_tokens=None
):
from aider.llm import litellm from aider.llm import litellm
kwargs = dict( kwargs = dict(
@ -70,6 +72,8 @@ def send_with_retries(model_name, messages, functions, stream, temperature=0, ex
kwargs["functions"] = functions kwargs["functions"] = functions
if extra_headers is not None: if extra_headers is not None:
kwargs["extra_headers"] = extra_headers kwargs["extra_headers"] = extra_headers
if max_tokens is not None:
kwargs["max_tokens"] = max_tokens
key = json.dumps(kwargs, sort_keys=True).encode() key = json.dumps(kwargs, sort_keys=True).encode()