added HelpCoder

2025-05-24 14:25:00 +00:00 · 2024-07-04 14:29:19 -03:00 · 2024-07-04 14:29:19 -03:00 · 9f39c8db44
commit 9f39c8db44
parent b3eb1dea49
7 changed files with 150 additions and 119 deletions
--- a/aider/coders/init.py
+++ b/aider/coders/init.py
@ -2,6 +2,7 @@ from .base_coder import Coder
 from .editblock_coder import EditBlockCoder
 from .editblock_fenced_coder import EditBlockFencedCoder
 from .editblock_func_coder import EditBlockFunctionCoder
 from .help_coder import HelpCoder
 from .single_wholefile_func_coder import SingleWholeFileFunctionCoder
 from .udiff_coder import UnifiedDiffCoder
 from .wholefile_coder import WholeFileCoder
@ -16,4 +17,5 @@ __all__ = [
    EditBlockFunctionCoder,
    SingleWholeFileFunctionCoder,
    UnifiedDiffCoder,
    HelpCoder,
 ]
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@ -80,6 +80,7 @@ class Coder:
        from . import (
            EditBlockCoder,
            EditBlockFencedCoder,
            HelpCoder,
            UnifiedDiffCoder,
            WholeFileCoder,
        )
@ -130,6 +131,8 @@ class Coder:
            res = WholeFileCoder(main_model, io, **kwargs)
        elif edit_format == "udiff":
            res = UnifiedDiffCoder(main_model, io, **kwargs)
        elif edit_format == "help":
            res = HelpCoder(main_model, io, **kwargs)
        else:
            raise ValueError(f"Unknown edit format {edit_format}")
--- a/aider/coders/help_coder.py
+++ b/aider/coders/help_coder.py
@ -0,0 +1,17 @@
 from ..dump import dump  # noqa: F401
 from .base_coder import Coder
 from .help_prompts import HelpPrompts
 class HelpCoder(Coder):
    edit_format = "help"
    def __init__(self, *args, **kwargs):
        self.gpt_prompts = HelpPrompts()
        super().__init__(*args, **kwargs)
    def get_edits(self, mode="update"):
        return []
    def apply_edits(self, edits):
        pass
--- a/aider/coders/help_prompts.py
+++ b/aider/coders/help_prompts.py
@ -0,0 +1,31 @@
 # flake8: noqa: E501
 from .base_prompts import CoderPrompts
 class HelpPrompts(CoderPrompts):
    main_system = """You are an expert on the AI coding tool called Aider.
 Answer the user's questions about how to use aider.
 The user is currently chatting with you using aider, to write and edit code.
 Use the provided aider documentation *if it is relevant to the user's question*.
 Include a bulleted list of urls to the aider docs that might be relevant for the user to read.
 Include *bare* urls. *Do not* make [markdown links](http://...).
 For example:
 - https://aider.chat/docs/usage.html
 - https://aider.chat/docs/faq.html
 If you don't know the answer, say so and suggest some relevant aider doc urls.
 If asks for something that isn't possible with aider, be clear about that.
 Don't suggest a solution that isn't supported.
 Be helpful but concise.
 Unless the question indicates otherwise, assume the user wants to use aider as a CLI tool.
 """
    example_messages = []
    system_reminder = ""
--- a/aider/commands.py
+++ b/aider/commands.py
@ -7,6 +7,7 @@ from pathlib import Path
 import git
 from aider import models, prompts, voice
 from aider.help import Help
 from aider.llm import litellm
 from aider.scrape import Scraper
 from aider.utils import is_image_file
@ -32,6 +33,8 @@ class Commands:
        self.voice_language = voice_language
        self.help = None
    def cmd_model(self, args):
        "Switch to a new LLM"
@ -622,15 +625,31 @@ class Commands:
    def cmd_help(self, args):
        "Show help about all commands"
-        commands = sorted(self.get_commands())
+
-        for cmd in commands:
+        from aider.coders import Coder
-            cmd_method_name = f"cmd_{cmd[1:]}"
+
-            cmd_method = getattr(self, cmd_method_name, None)
+        if not self.help:
-            if cmd_method:
+            self.help = Help()
-                description = cmd_method.__doc__
+
-                self.io.tool_output(f"{cmd} {description}")
+        coder = Coder.create(
-            else:
+            main_model=self.coder.main_model,
-                self.io.tool_output(f"{cmd} No description available.")
+            io=self.io,
            from_coder=self.coder,
            edit_format="help",
        )
        user_msg = self.help.ask(args)
        user_msg += """
 # Announcement lines from when this session of aider was launched:
 """
        user_msg += "\n".join(self.coder.get_announcements()) + "\n"
        assistant_msg = coder.run(user_msg)
        self.coder.cur_messages += [
            dict(role="user", content=user_msg),
            dict(role="assistant", content=assistant_msg),
        ]
    def get_help_md(self):
        "Show help about all commands in markdown"
--- a/aider/help.py
+++ b/aider/help.py
@ -1,19 +1,15 @@
 #!/usr/bin/env python
-import time
+import os
 import sys
 import warnings
 from pathlib import Path
-import litellm
+from tqdm import tqdm
 from dump import dump
 from llama_index.core import (
    Document,
    StorageContext,
    VectorStoreIndex,
    load_index_from_storage,
 )
 from llama_index.core.node_parser import MarkdownNodeParser
-litellm.suppress_debug_info = True
+from aider.dump import dump  # noqa: F401
 warnings.simplefilter("ignore", category=FutureWarning)
 def should_skip_dir(dirname):
@ -35,75 +31,38 @@ def walk_subdirs_for_files(root_dir):
        yield str(path)
 def execute(question, text):
    sys_content = """Answer questions about how to use the Aider program.
 Give answers about how to use aider to accomplish the user's questions,
 not general advice on how to use other tools or approaches.
 Use the provided aider documentation *if it is relevant to the user's questions*.
 Include a urls to the aider docs that might be relevant for the user to read
 .
 If you don't know the answer, say so and suggest some relevant aider doc urls.
 If the user asks how to do something that aider doesn't support, tell them that.
 Be helpful but concise.
 Unless the question indicates otherwise, assume the user wants to use
 aider as a CLI tool.
 """
    usage = Path("website/docs/usage.md").read_text()
    content = f"""# Question:
 {question}
 # Relevant documentation:
 {text}
 #####
 {usage}
 """
    messages = [
        dict(
            role="system",
            content=sys_content,
        ),
        dict(
            role="user",
            content=content,
        ),
    ]
    res = litellm.completion(
        messages=messages,
        # model="gpt-3.5-turbo",
        model="gpt-4o",
    )
    return res
 def fname_to_url(filepath):
    website = "website/"
    index = "/index.md"
    md = ".md"
    docid = ""
    if filepath.startswith("website/_includes/"):
-        docid = ""
+        pass
-    else:
+    elif filepath.startswith(website):
        website = "website/"
        assert filepath.startswith(website), filepath
        docid = filepath[len(website) :]
        if filepath.endswith(index):
            filepath = filepath[: -len(index)] + "/"
        elif filepath.endswith(md):
            filepath = filepath[: -len(md)] + ".html"
        docid = "https://aider.chat/" + filepath
    return docid
 def get_index():
-    dname = Path("storage")
+    from llama_index.core import (
        Document,
        StorageContext,
        VectorStoreIndex,
        load_index_from_storage,
    )
    from llama_index.core.node_parser import MarkdownNodeParser
    dname = Path.home() / ".aider" / "help"
    if dname.exists():
        storage_context = StorageContext.from_defaults(
            persist_dir=dname,
@ -113,9 +72,7 @@ def get_index():
        parser = MarkdownNodeParser()
        nodes = []
-        for fname in walk_subdirs_for_files("website"):
+        for fname in tqdm(list(walk_subdirs_for_files("website"))):
            dump(fname)
            # doc = FlatReader().load_data(Path(fname))
            fname = Path(fname)
            doc = Document(
                text=fname.read_text(),
@ -128,19 +85,44 @@ def get_index():
            nodes += parser.get_nodes_from_documents([doc])
        index = VectorStoreIndex(nodes)
        dname.parent.mkdir(exist_ok=True)
        index.storage_context.persist(dname)
    return index
-when = time.time()
+class Help:
    def __init__(self):
        from llama_index.core import Settings
        from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-index = get_index()
+        os.environ["TOKENIZERS_PARALLELISM"] = "true"
        Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
-print("get_index", time.time() - when)
+        index = get_index()
-when = time.time()
+
        self.retriever = index.as_retriever(similarity_top_k=20)
    def ask(self, question):
        nodes = self.retriever.retrieve(question)
        context = f"""# Question: {question}
 # Relevant docs:
 """
        for node in nodes:
            url = node.metadata.get("url", "")
            if url:
                url = f' from_url="{url}"'
            context += f"<doc{url}>\n"
            context += node.text
            context += "\n</doc>\n\n"
        return context
 retriever = index.as_retriever(similarity_top_k=20)
 #
 # question = "how can i convert a python script to js"
@ -148,34 +130,9 @@ retriever = index.as_retriever(similarity_top_k=20)
 # question = "i am getting an error message about exhausted context window"
 # question = "The chat session is larger than the context window!"
 # question = "how do i add deepseek api key to yaml"
-question = (
+# question = (
-    "It would be great if I could give aider an example github PR and instruct it to do the same"
+#    "It would be great if I could give aider an example github PR and instruct it to do the same"
-    " exact thing for another integration."
+#    " exact thing for another integration."
-)
+# )
-nodes = retriever.retrieve(question)
+question = " ".join(sys.argv[1:])
 print("retrieve", time.time() - when)
 when = time.time()
 dump(len(nodes))
 context = ""
 for node in nodes:
    fname = node.metadata["filename"]
    url = node.metadata.get("url", "")
    if url:
        url = f' from_url="{url}"'
    context += f"<doc{url}>\n"
    context += node.text
    context += "\n</doc>\n\n"
 # dump(context)
 res = execute(question, context)
 content = res.choices[0].message.content
 dump(content)
 print("llm", time.time() - when)
 when = time.time()
--- a/requirements.in
+++ b/requirements.in
@ -27,6 +27,8 @@ google-generativeai
 streamlit
 watchdog
 flake8
 llama-index-core
 llama-index-embeddings-huggingface
 # v3.3 no longer works on python 3.9
 networkx<3.3
@ -40,4 +42,4 @@ scipy<1.14
 # GitHub Release action failing on "KeyError: 'home-page'"
 # https://github.com/pypa/twine/blob/6fbf880ee60915cf1666348c4bdd78a10415f2ac/twine/__init__.py#L40
 # Uses importlib-metadata
-importlib-metadata<8.0.0
+importlib-metadata<8.0.0