From 044617b1b7f15297c88658efa3ca6e822f02df7a Mon Sep 17 00:00:00 2001
From: Paul Gauthier <aider@paulg.org>
Date: Thu, 27 Jun 2024 14:40:46 -0700
Subject: [PATCH] continue roughly working using anthropic's prefill

---
 aider/coders/base_coder.py | 64 +++++++++++++++++++++++++-------------
 aider/sendchat.py          |  3 +-
 2 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py
index c5a4bf794..fed054920 100755
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@@ -13,7 +13,6 @@ from json.decoder import JSONDecodeError
 from pathlib import Path
 
 import git
-import openai
 from jsonschema import Draft7Validator
 from rich.console import Console, Text
 from rich.markdown import Markdown
@@ -37,7 +36,7 @@ class MissingAPIKeyError(ValueError):
     pass
 
 
-class ExhaustedContextWindow(Exception):
+class FinishReasonLength(Exception):
     pass
 
 
@@ -812,28 +811,43 @@ class Coder:
         if self.verbose:
             utils.show_messages(messages, functions=self.functions)
 
+        multi_response_content = ""
         exhausted = False
         interrupted = False
-        try:
-            yield from self.send(messages, functions=self.functions)
-        except KeyboardInterrupt:
-            interrupted = True
-        except ExhaustedContextWindow:
-            exhausted = True
-        except litellm.exceptions.BadRequestError as err:
-            if "ContextWindowExceededError" in err.message:
+        while True:
+            try:
+                yield from self.send(messages, functions=self.functions)
+                break
+            except KeyboardInterrupt:
+                interrupted = True
+                break
+            except litellm.ContextWindowExceededError as cwe_err:
+                # the input is overflowing the context window
                 exhausted = True
-            else:
-                self.io.tool_error(f"BadRequestError: {err}")
+                dump(cwe_err)
+                break
+            except litellm.exceptions.BadRequestError as br_err:
+                dump(br_err)
+                self.io.tool_error(f"BadRequestError: {br_err}")
                 return
-        except openai.BadRequestError as err:
-            if "maximum context length" in str(err):
-                exhausted = True
-            else:
-                raise err
-        except Exception as err:
-            self.io.tool_error(f"Unexpected error: {err}")
-            return
+            except FinishReasonLength as frl_err:
+                # finish_reason=length means 4k output limit?
+                dump(frl_err)
+                # exhausted = True
+
+                multi_response_content += self.partial_response_content
+                if messages[-1]["role"] == "assistant":
+                    messages[-1]["content"] = multi_response_content
+                else:
+                    messages.append(dict(role="assistant", content=multi_response_content))
+            except Exception as err:
+                self.io.tool_error(f"Unexpected error: {err}")
+                traceback.print_exc()
+                return
+
+        if multi_response_content:
+            multi_response_content += self.partial_response_content
+            self.partial_response_content = multi_response_content
 
         if exhausted:
             self.show_exhausted_error()
@@ -1103,7 +1117,7 @@ class Coder:
         if show_func_err and show_content_err:
             self.io.tool_error(show_func_err)
             self.io.tool_error(show_content_err)
-            raise Exception("No data found in openai response!")
+            raise Exception("No data found in LLM response!")
 
         tokens = None
         if hasattr(completion, "usage") and completion.usage is not None:
@@ -1131,6 +1145,12 @@ class Coder:
         if tokens is not None:
             self.io.tool_output(tokens)
 
+        if (
+            hasattr(completion.choices[0], "finish_reason")
+            and completion.choices[0].finish_reason == "length"
+        ):
+            raise FinishReasonLength()
+
     def show_send_output_stream(self, completion):
         if self.show_pretty():
             mdargs = dict(style=self.assistant_output_color, code_theme=self.code_theme)
@@ -1147,7 +1167,7 @@ class Coder:
                     hasattr(chunk.choices[0], "finish_reason")
                     and chunk.choices[0].finish_reason == "length"
                 ):
-                    raise ExhaustedContextWindow()
+                    raise FinishReasonLength()
 
                 try:
                     func = chunk.choices[0].delta.function_call
diff --git a/aider/sendchat.py b/aider/sendchat.py
index 19e91d255..8f661f598 100644
--- a/aider/sendchat.py
+++ b/aider/sendchat.py
@@ -3,7 +3,6 @@ import json
 
 import backoff
 import httpx
-import openai
 
 from aider.dump import dump  # noqa: F401
 from aider.litellm import litellm
@@ -85,5 +84,5 @@ def simple_send_with_retries(model_name, messages):
             stream=False,
         )
         return response.choices[0].message.content
-    except (AttributeError, openai.BadRequestError):
+    except (AttributeError, litellm.exceptions.BadRequestError):
         return