added HelpCoder

2025-05-24 14:25:00 +00:00 · 2024-07-04 14:29:19 -03:00 · 2024-07-04 14:29:19 -03:00 · 9f39c8db44
commit 9f39c8db44
parent b3eb1dea49
7 changed files with 150 additions and 119 deletions
--- a/aider/coders/init.py
+++ b/aider/coders/init.py
@ -2,6 +2,7 @@ from .base_coder import Coder
 from .editblock_coder import EditBlockCoder
 from .editblock_fenced_coder import EditBlockFencedCoder
 from .editblock_func_coder import EditBlockFunctionCoder
+from .help_coder import HelpCoder
 from .single_wholefile_func_coder import SingleWholeFileFunctionCoder
 from .udiff_coder import UnifiedDiffCoder
 from .wholefile_coder import WholeFileCoder
@ -16,4 +17,5 @@ __all__ = [
    EditBlockFunctionCoder,
    SingleWholeFileFunctionCoder,
    UnifiedDiffCoder,
+    HelpCoder,
 ]
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@ -80,6 +80,7 @@ class Coder:
        from . import (
            EditBlockCoder,
            EditBlockFencedCoder,
+            HelpCoder,
            UnifiedDiffCoder,
            WholeFileCoder,
        )
@ -130,6 +131,8 @@ class Coder:
            res = WholeFileCoder(main_model, io, **kwargs)
        elif edit_format == "udiff":
            res = UnifiedDiffCoder(main_model, io, **kwargs)
+        elif edit_format == "help":
+            res = HelpCoder(main_model, io, **kwargs)
        else:
            raise ValueError(f"Unknown edit format {edit_format}")

--- a/aider/coders/help_coder.py
+++ b/aider/coders/help_coder.py
@ -0,0 +1,17 @@
+from ..dump import dump  # noqa: F401
+from .base_coder import Coder
+from .help_prompts import HelpPrompts
+
+
+class HelpCoder(Coder):
+    edit_format = "help"
+
+    def __init__(self, *args, **kwargs):
+        self.gpt_prompts = HelpPrompts()
+        super().__init__(*args, **kwargs)
+
+    def get_edits(self, mode="update"):
+        return []
+
+    def apply_edits(self, edits):
+        pass
--- a/aider/coders/help_prompts.py
+++ b/aider/coders/help_prompts.py
@ -0,0 +1,31 @@
+# flake8: noqa: E501
+
+from .base_prompts import CoderPrompts
+
+
+class HelpPrompts(CoderPrompts):
+    main_system = """You are an expert on the AI coding tool called Aider.
+Answer the user's questions about how to use aider.
+
+The user is currently chatting with you using aider, to write and edit code.
+
+Use the provided aider documentation *if it is relevant to the user's question*.
+
+Include a bulleted list of urls to the aider docs that might be relevant for the user to read.
+Include *bare* urls. *Do not* make [markdown links](http://...).
+For example:
+- https://aider.chat/docs/usage.html
+- https://aider.chat/docs/faq.html
+
+If you don't know the answer, say so and suggest some relevant aider doc urls.
+
+If asks for something that isn't possible with aider, be clear about that.
+Don't suggest a solution that isn't supported.
+
+Be helpful but concise.
+
+Unless the question indicates otherwise, assume the user wants to use aider as a CLI tool.
+"""
+
+    example_messages = []
+    system_reminder = ""
--- a/aider/commands.py
+++ b/aider/commands.py
@ -7,6 +7,7 @@ from pathlib import Path
 import git

 from aider import models, prompts, voice
+from aider.help import Help
 from aider.llm import litellm
 from aider.scrape import Scraper
 from aider.utils import is_image_file
@ -32,6 +33,8 @@ class Commands:

        self.voice_language = voice_language

+        self.help = None
+
    def cmd_model(self, args):
        "Switch to a new LLM"

@ -622,15 +625,31 @@ class Commands:

    def cmd_help(self, args):
        "Show help about all commands"
-        commands = sorted(self.get_commands())
-        for cmd in commands:
-            cmd_method_name = f"cmd_{cmd[1:]}"
-            cmd_method = getattr(self, cmd_method_name, None)
-            if cmd_method:
-                description = cmd_method.__doc__
-                self.io.tool_output(f"{cmd} {description}")
-            else:
-                self.io.tool_output(f"{cmd} No description available.")
+
+        from aider.coders import Coder
+
+        if not self.help:
+            self.help = Help()
+
+        coder = Coder.create(
+            main_model=self.coder.main_model,
+            io=self.io,
+            from_coder=self.coder,
+            edit_format="help",
+        )
+        user_msg = self.help.ask(args)
+        user_msg += """
+# Announcement lines from when this session of aider was launched:
+
+"""
+        user_msg += "\n".join(self.coder.get_announcements()) + "\n"
+
+        assistant_msg = coder.run(user_msg)
+
+        self.coder.cur_messages += [
+            dict(role="user", content=user_msg),
+            dict(role="assistant", content=assistant_msg),
+        ]

    def get_help_md(self):
        "Show help about all commands in markdown"
--- a/aider/help.py
+++ b/aider/help.py
@ -1,19 +1,15 @@
 #!/usr/bin/env python

-import time
+import os
+import sys
+import warnings
 from pathlib import Path

-import litellm
-from dump import dump
-from llama_index.core import (
-    Document,
-    StorageContext,
-    VectorStoreIndex,
-    load_index_from_storage,
-)
-from llama_index.core.node_parser import MarkdownNodeParser
+from tqdm import tqdm

-litellm.suppress_debug_info = True
+from aider.dump import dump  # noqa: F401
+
+warnings.simplefilter("ignore", category=FutureWarning)


 def should_skip_dir(dirname):
@ -35,75 +31,38 @@ def walk_subdirs_for_files(root_dir):
        yield str(path)


-def execute(question, text):
-    sys_content = """Answer questions about how to use the Aider program.
-Give answers about how to use aider to accomplish the user's questions,
-not general advice on how to use other tools or approaches.
-
-Use the provided aider documentation *if it is relevant to the user's questions*.
-
-Include a urls to the aider docs that might be relevant for the user to read
-.
-
-If you don't know the answer, say so and suggest some relevant aider doc urls.
-If the user asks how to do something that aider doesn't support, tell them that.
-
-Be helpful but concise.
-
-Unless the question indicates otherwise, assume the user wants to use
-aider as a CLI tool.
-"""
-
-    usage = Path("website/docs/usage.md").read_text()
-
-    content = f"""# Question:
-
-{question}
-
-
-# Relevant documentation:
-
-{text}
-
-#####
-
-{usage}
-"""
-
-    messages = [
-        dict(
-            role="system",
-            content=sys_content,
-        ),
-        dict(
-            role="user",
-            content=content,
-        ),
-    ]
-
-    res = litellm.completion(
-        messages=messages,
-        # model="gpt-3.5-turbo",
-        model="gpt-4o",
-    )
-
-    return res
-
-
 def fname_to_url(filepath):
+    website = "website/"
+    index = "/index.md"
+    md = ".md"
+
+    docid = ""
    if filepath.startswith("website/_includes/"):
-        docid = ""
-    else:
-        website = "website/"
-        assert filepath.startswith(website), filepath
+        pass
+    elif filepath.startswith(website):
        docid = filepath[len(website) :]
+
+        if filepath.endswith(index):
+            filepath = filepath[: -len(index)] + "/"
+        elif filepath.endswith(md):
+            filepath = filepath[: -len(md)] + ".html"
+
        docid = "https://aider.chat/" + filepath

    return docid


 def get_index():
-    dname = Path("storage")
+    from llama_index.core import (
+        Document,
+        StorageContext,
+        VectorStoreIndex,
+        load_index_from_storage,
+    )
+    from llama_index.core.node_parser import MarkdownNodeParser
+
+    dname = Path.home() / ".aider" / "help"
+
    if dname.exists():
        storage_context = StorageContext.from_defaults(
            persist_dir=dname,
@ -113,9 +72,7 @@ def get_index():
        parser = MarkdownNodeParser()

        nodes = []
-        for fname in walk_subdirs_for_files("website"):
-            dump(fname)
-            # doc = FlatReader().load_data(Path(fname))
+        for fname in tqdm(list(walk_subdirs_for_files("website"))):
            fname = Path(fname)
            doc = Document(
                text=fname.read_text(),
@ -128,19 +85,44 @@ def get_index():
            nodes += parser.get_nodes_from_documents([doc])

        index = VectorStoreIndex(nodes)
+        dname.parent.mkdir(exist_ok=True)
        index.storage_context.persist(dname)

    return index


-when = time.time()
+class Help:
+    def __init__(self):
+        from llama_index.core import Settings
+        from llama_index.embeddings.huggingface import HuggingFaceEmbedding

-index = get_index()
+        os.environ["TOKENIZERS_PARALLELISM"] = "true"
+        Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

-print("get_index", time.time() - when)
-when = time.time()
+        index = get_index()
+
+        self.retriever = index.as_retriever(similarity_top_k=20)
+
+    def ask(self, question):
+        nodes = self.retriever.retrieve(question)
+
+        context = f"""# Question: {question}
+
+# Relevant docs:
+
+"""
+
+        for node in nodes:
+            url = node.metadata.get("url", "")
+            if url:
+                url = f' from_url="{url}"'
+
+            context += f"<doc{url}>\n"
+            context += node.text
+            context += "\n</doc>\n\n"
+
+        return context

-retriever = index.as_retriever(similarity_top_k=20)

 #
 # question = "how can i convert a python script to js"
@ -148,34 +130,9 @@ retriever = index.as_retriever(similarity_top_k=20)
 # question = "i am getting an error message about exhausted context window"
 # question = "The chat session is larger than the context window!"
 # question = "how do i add deepseek api key to yaml"
-question = (
-    "It would be great if I could give aider an example github PR and instruct it to do the same"
-    " exact thing for another integration."
-)
+# question = (
+#    "It would be great if I could give aider an example github PR and instruct it to do the same"
+#    " exact thing for another integration."
+# )

-nodes = retriever.retrieve(question)
-
-print("retrieve", time.time() - when)
-when = time.time()
-
-dump(len(nodes))
-
-context = ""
-for node in nodes:
-    fname = node.metadata["filename"]
-    url = node.metadata.get("url", "")
-    if url:
-        url = f' from_url="{url}"'
-
-    context += f"<doc{url}>\n"
-    context += node.text
-    context += "\n</doc>\n\n"
-
-# dump(context)
-
-res = execute(question, context)
-content = res.choices[0].message.content
-dump(content)
-
-print("llm", time.time() - when)
-when = time.time()
+question = " ".join(sys.argv[1:])
--- a/requirements.in
+++ b/requirements.in
@ -27,6 +27,8 @@ google-generativeai
 streamlit
 watchdog
 flake8
+llama-index-core
+llama-index-embeddings-huggingface

 # v3.3 no longer works on python 3.9
 networkx<3.3
@ -40,4 +42,4 @@ scipy<1.14
 # GitHub Release action failing on "KeyError: 'home-page'"
 # https://github.com/pypa/twine/blob/6fbf880ee60915cf1666348c4bdd78a10415f2ac/twine/__init__.py#L40
 # Uses importlib-metadata
-importlib-metadata<8.0.0
+importlib-metadata<8.0.0