From 9f39c8db44486531a438ae436eb30bc68918a460 Mon Sep 17 00:00:00 2001
From: Paul Gauthier <aider@paulg.org>
Date: Thu, 4 Jul 2024 14:29:19 -0300
Subject: [PATCH] added HelpCoder

---
 aider/coders/__init__.py     |   2 +
 aider/coders/base_coder.py   |   3 +
 aider/coders/help_coder.py   |  17 ++++
 aider/coders/help_prompts.py |  31 +++++++
 aider/commands.py            |  37 ++++++--
 aider/help.py                | 175 +++++++++++++----------------------
 requirements.in              |   4 +-
 7 files changed, 150 insertions(+), 119 deletions(-)
 create mode 100644 aider/coders/help_coder.py
 create mode 100644 aider/coders/help_prompts.py

diff --git a/aider/coders/__init__.py b/aider/coders/__init__.py
index 19d12b0d7..08126bae5 100644
--- a/aider/coders/__init__.py
+++ b/aider/coders/__init__.py
@@ -2,6 +2,7 @@ from .base_coder import Coder
 from .editblock_coder import EditBlockCoder
 from .editblock_fenced_coder import EditBlockFencedCoder
 from .editblock_func_coder import EditBlockFunctionCoder
+from .help_coder import HelpCoder
 from .single_wholefile_func_coder import SingleWholeFileFunctionCoder
 from .udiff_coder import UnifiedDiffCoder
 from .wholefile_coder import WholeFileCoder
@@ -16,4 +17,5 @@ __all__ = [
     EditBlockFunctionCoder,
     SingleWholeFileFunctionCoder,
     UnifiedDiffCoder,
+    HelpCoder,
 ]
diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py
index 662bd7912..a25bdab0f 100755
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@@ -80,6 +80,7 @@ class Coder:
         from . import (
             EditBlockCoder,
             EditBlockFencedCoder,
+            HelpCoder,
             UnifiedDiffCoder,
             WholeFileCoder,
         )
@@ -130,6 +131,8 @@ class Coder:
             res = WholeFileCoder(main_model, io, **kwargs)
         elif edit_format == "udiff":
             res = UnifiedDiffCoder(main_model, io, **kwargs)
+        elif edit_format == "help":
+            res = HelpCoder(main_model, io, **kwargs)
         else:
             raise ValueError(f"Unknown edit format {edit_format}")
 
diff --git a/aider/coders/help_coder.py b/aider/coders/help_coder.py
new file mode 100644
index 000000000..1225b7b97
--- /dev/null
+++ b/aider/coders/help_coder.py
@@ -0,0 +1,17 @@
+from ..dump import dump  # noqa: F401
+from .base_coder import Coder
+from .help_prompts import HelpPrompts
+
+
+class HelpCoder(Coder):
+    edit_format = "help"
+
+    def __init__(self, *args, **kwargs):
+        self.gpt_prompts = HelpPrompts()
+        super().__init__(*args, **kwargs)
+
+    def get_edits(self, mode="update"):
+        return []
+
+    def apply_edits(self, edits):
+        pass
diff --git a/aider/coders/help_prompts.py b/aider/coders/help_prompts.py
new file mode 100644
index 000000000..205b1ccd9
--- /dev/null
+++ b/aider/coders/help_prompts.py
@@ -0,0 +1,31 @@
+# flake8: noqa: E501
+
+from .base_prompts import CoderPrompts
+
+
+class HelpPrompts(CoderPrompts):
+    main_system = """You are an expert on the AI coding tool called Aider.
+Answer the user's questions about how to use aider.
+
+The user is currently chatting with you using aider, to write and edit code.
+
+Use the provided aider documentation *if it is relevant to the user's question*.
+
+Include a bulleted list of urls to the aider docs that might be relevant for the user to read.
+Include *bare* urls. *Do not* make [markdown links](http://...).
+For example:
+- https://aider.chat/docs/usage.html
+- https://aider.chat/docs/faq.html
+
+If you don't know the answer, say so and suggest some relevant aider doc urls.
+
+If asks for something that isn't possible with aider, be clear about that.
+Don't suggest a solution that isn't supported.
+
+Be helpful but concise.
+
+Unless the question indicates otherwise, assume the user wants to use aider as a CLI tool.
+"""
+
+    example_messages = []
+    system_reminder = ""
diff --git a/aider/commands.py b/aider/commands.py
index fd129dc3c..440c0a179 100644
--- a/aider/commands.py
+++ b/aider/commands.py
@@ -7,6 +7,7 @@ from pathlib import Path
 import git
 
 from aider import models, prompts, voice
+from aider.help import Help
 from aider.llm import litellm
 from aider.scrape import Scraper
 from aider.utils import is_image_file
@@ -32,6 +33,8 @@ class Commands:
 
         self.voice_language = voice_language
 
+        self.help = None
+
     def cmd_model(self, args):
         "Switch to a new LLM"
 
@@ -622,15 +625,31 @@ class Commands:
 
     def cmd_help(self, args):
         "Show help about all commands"
-        commands = sorted(self.get_commands())
-        for cmd in commands:
-            cmd_method_name = f"cmd_{cmd[1:]}"
-            cmd_method = getattr(self, cmd_method_name, None)
-            if cmd_method:
-                description = cmd_method.__doc__
-                self.io.tool_output(f"{cmd} {description}")
-            else:
-                self.io.tool_output(f"{cmd} No description available.")
+
+        from aider.coders import Coder
+
+        if not self.help:
+            self.help = Help()
+
+        coder = Coder.create(
+            main_model=self.coder.main_model,
+            io=self.io,
+            from_coder=self.coder,
+            edit_format="help",
+        )
+        user_msg = self.help.ask(args)
+        user_msg += """
+# Announcement lines from when this session of aider was launched:
+
+"""
+        user_msg += "\n".join(self.coder.get_announcements()) + "\n"
+
+        assistant_msg = coder.run(user_msg)
+
+        self.coder.cur_messages += [
+            dict(role="user", content=user_msg),
+            dict(role="assistant", content=assistant_msg),
+        ]
 
     def get_help_md(self):
         "Show help about all commands in markdown"
diff --git a/aider/help.py b/aider/help.py
index 07625ac71..566ce85da 100755
--- a/aider/help.py
+++ b/aider/help.py
@@ -1,19 +1,15 @@
 #!/usr/bin/env python
 
-import time
+import os
+import sys
+import warnings
 from pathlib import Path
 
-import litellm
-from dump import dump
-from llama_index.core import (
-    Document,
-    StorageContext,
-    VectorStoreIndex,
-    load_index_from_storage,
-)
-from llama_index.core.node_parser import MarkdownNodeParser
+from tqdm import tqdm
 
-litellm.suppress_debug_info = True
+from aider.dump import dump  # noqa: F401
+
+warnings.simplefilter("ignore", category=FutureWarning)
 
 
 def should_skip_dir(dirname):
@@ -35,75 +31,38 @@ def walk_subdirs_for_files(root_dir):
         yield str(path)
 
 
-def execute(question, text):
-    sys_content = """Answer questions about how to use the Aider program.
-Give answers about how to use aider to accomplish the user's questions,
-not general advice on how to use other tools or approaches.
-
-Use the provided aider documentation *if it is relevant to the user's questions*.
-
-Include a urls to the aider docs that might be relevant for the user to read
-.
-
-If you don't know the answer, say so and suggest some relevant aider doc urls.
-If the user asks how to do something that aider doesn't support, tell them that.
-
-Be helpful but concise.
-
-Unless the question indicates otherwise, assume the user wants to use
-aider as a CLI tool.
-"""
-
-    usage = Path("website/docs/usage.md").read_text()
-
-    content = f"""# Question:
-
-{question}
-
-
-# Relevant documentation:
-
-{text}
-
-#####
-
-{usage}
-"""
-
-    messages = [
-        dict(
-            role="system",
-            content=sys_content,
-        ),
-        dict(
-            role="user",
-            content=content,
-        ),
-    ]
-
-    res = litellm.completion(
-        messages=messages,
-        # model="gpt-3.5-turbo",
-        model="gpt-4o",
-    )
-
-    return res
-
-
 def fname_to_url(filepath):
+    website = "website/"
+    index = "/index.md"
+    md = ".md"
+
+    docid = ""
     if filepath.startswith("website/_includes/"):
-        docid = ""
-    else:
-        website = "website/"
-        assert filepath.startswith(website), filepath
+        pass
+    elif filepath.startswith(website):
         docid = filepath[len(website) :]
+
+        if filepath.endswith(index):
+            filepath = filepath[: -len(index)] + "/"
+        elif filepath.endswith(md):
+            filepath = filepath[: -len(md)] + ".html"
+
         docid = "https://aider.chat/" + filepath
 
     return docid
 
 
 def get_index():
-    dname = Path("storage")
+    from llama_index.core import (
+        Document,
+        StorageContext,
+        VectorStoreIndex,
+        load_index_from_storage,
+    )
+    from llama_index.core.node_parser import MarkdownNodeParser
+
+    dname = Path.home() / ".aider" / "help"
+
     if dname.exists():
         storage_context = StorageContext.from_defaults(
             persist_dir=dname,
@@ -113,9 +72,7 @@ def get_index():
         parser = MarkdownNodeParser()
 
         nodes = []
-        for fname in walk_subdirs_for_files("website"):
-            dump(fname)
-            # doc = FlatReader().load_data(Path(fname))
+        for fname in tqdm(list(walk_subdirs_for_files("website"))):
             fname = Path(fname)
             doc = Document(
                 text=fname.read_text(),
@@ -128,19 +85,44 @@ def get_index():
             nodes += parser.get_nodes_from_documents([doc])
 
         index = VectorStoreIndex(nodes)
+        dname.parent.mkdir(exist_ok=True)
         index.storage_context.persist(dname)
 
     return index
 
 
-when = time.time()
+class Help:
+    def __init__(self):
+        from llama_index.core import Settings
+        from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 
-index = get_index()
+        os.environ["TOKENIZERS_PARALLELISM"] = "true"
+        Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
 
-print("get_index", time.time() - when)
-when = time.time()
+        index = get_index()
+
+        self.retriever = index.as_retriever(similarity_top_k=20)
+
+    def ask(self, question):
+        nodes = self.retriever.retrieve(question)
+
+        context = f"""# Question: {question}
+
+# Relevant docs:
+
+"""
+
+        for node in nodes:
+            url = node.metadata.get("url", "")
+            if url:
+                url = f' from_url="{url}"'
+
+            context += f"<doc{url}>\n"
+            context += node.text
+            context += "\n</doc>\n\n"
+
+        return context
 
-retriever = index.as_retriever(similarity_top_k=20)
 
 #
 # question = "how can i convert a python script to js"
@@ -148,34 +130,9 @@ retriever = index.as_retriever(similarity_top_k=20)
 # question = "i am getting an error message about exhausted context window"
 # question = "The chat session is larger than the context window!"
 # question = "how do i add deepseek api key to yaml"
-question = (
-    "It would be great if I could give aider an example github PR and instruct it to do the same"
-    " exact thing for another integration."
-)
+# question = (
+#    "It would be great if I could give aider an example github PR and instruct it to do the same"
+#    " exact thing for another integration."
+# )
 
-nodes = retriever.retrieve(question)
-
-print("retrieve", time.time() - when)
-when = time.time()
-
-dump(len(nodes))
-
-context = ""
-for node in nodes:
-    fname = node.metadata["filename"]
-    url = node.metadata.get("url", "")
-    if url:
-        url = f' from_url="{url}"'
-
-    context += f"<doc{url}>\n"
-    context += node.text
-    context += "\n</doc>\n\n"
-
-# dump(context)
-
-res = execute(question, context)
-content = res.choices[0].message.content
-dump(content)
-
-print("llm", time.time() - when)
-when = time.time()
+question = " ".join(sys.argv[1:])
diff --git a/requirements.in b/requirements.in
index 72b929e84..411d46e8b 100644
--- a/requirements.in
+++ b/requirements.in
@@ -27,6 +27,8 @@ google-generativeai
 streamlit
 watchdog
 flake8
+llama-index-core
+llama-index-embeddings-huggingface
 
 # v3.3 no longer works on python 3.9
 networkx<3.3
@@ -40,4 +42,4 @@ scipy<1.14
 # GitHub Release action failing on "KeyError: 'home-page'"
 # https://github.com/pypa/twine/blob/6fbf880ee60915cf1666348c4bdd78a10415f2ac/twine/__init__.py#L40
 # Uses importlib-metadata
-importlib-metadata<8.0.0
\ No newline at end of file
+importlib-metadata<8.0.0