From 9f39c8db44486531a438ae436eb30bc68918a460 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Thu, 4 Jul 2024 14:29:19 -0300 Subject: [PATCH] added HelpCoder --- aider/coders/__init__.py | 2 + aider/coders/base_coder.py | 3 + aider/coders/help_coder.py | 17 ++++ aider/coders/help_prompts.py | 31 +++++++ aider/commands.py | 37 ++++++-- aider/help.py | 175 +++++++++++++---------------------- requirements.in | 4 +- 7 files changed, 150 insertions(+), 119 deletions(-) create mode 100644 aider/coders/help_coder.py create mode 100644 aider/coders/help_prompts.py diff --git a/aider/coders/__init__.py b/aider/coders/__init__.py index 19d12b0d7..08126bae5 100644 --- a/aider/coders/__init__.py +++ b/aider/coders/__init__.py @@ -2,6 +2,7 @@ from .base_coder import Coder from .editblock_coder import EditBlockCoder from .editblock_fenced_coder import EditBlockFencedCoder from .editblock_func_coder import EditBlockFunctionCoder +from .help_coder import HelpCoder from .single_wholefile_func_coder import SingleWholeFileFunctionCoder from .udiff_coder import UnifiedDiffCoder from .wholefile_coder import WholeFileCoder @@ -16,4 +17,5 @@ __all__ = [ EditBlockFunctionCoder, SingleWholeFileFunctionCoder, UnifiedDiffCoder, + HelpCoder, ] diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 662bd7912..a25bdab0f 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -80,6 +80,7 @@ class Coder: from . import ( EditBlockCoder, EditBlockFencedCoder, + HelpCoder, UnifiedDiffCoder, WholeFileCoder, ) @@ -130,6 +131,8 @@ class Coder: res = WholeFileCoder(main_model, io, **kwargs) elif edit_format == "udiff": res = UnifiedDiffCoder(main_model, io, **kwargs) + elif edit_format == "help": + res = HelpCoder(main_model, io, **kwargs) else: raise ValueError(f"Unknown edit format {edit_format}") diff --git a/aider/coders/help_coder.py b/aider/coders/help_coder.py new file mode 100644 index 000000000..1225b7b97 --- /dev/null +++ b/aider/coders/help_coder.py @@ -0,0 +1,17 @@ +from ..dump import dump # noqa: F401 +from .base_coder import Coder +from .help_prompts import HelpPrompts + + +class HelpCoder(Coder): + edit_format = "help" + + def __init__(self, *args, **kwargs): + self.gpt_prompts = HelpPrompts() + super().__init__(*args, **kwargs) + + def get_edits(self, mode="update"): + return [] + + def apply_edits(self, edits): + pass diff --git a/aider/coders/help_prompts.py b/aider/coders/help_prompts.py new file mode 100644 index 000000000..205b1ccd9 --- /dev/null +++ b/aider/coders/help_prompts.py @@ -0,0 +1,31 @@ +# flake8: noqa: E501 + +from .base_prompts import CoderPrompts + + +class HelpPrompts(CoderPrompts): + main_system = """You are an expert on the AI coding tool called Aider. +Answer the user's questions about how to use aider. + +The user is currently chatting with you using aider, to write and edit code. + +Use the provided aider documentation *if it is relevant to the user's question*. + +Include a bulleted list of urls to the aider docs that might be relevant for the user to read. +Include *bare* urls. *Do not* make [markdown links](http://...). +For example: +- https://aider.chat/docs/usage.html +- https://aider.chat/docs/faq.html + +If you don't know the answer, say so and suggest some relevant aider doc urls. + +If asks for something that isn't possible with aider, be clear about that. +Don't suggest a solution that isn't supported. + +Be helpful but concise. + +Unless the question indicates otherwise, assume the user wants to use aider as a CLI tool. +""" + + example_messages = [] + system_reminder = "" diff --git a/aider/commands.py b/aider/commands.py index fd129dc3c..440c0a179 100644 --- a/aider/commands.py +++ b/aider/commands.py @@ -7,6 +7,7 @@ from pathlib import Path import git from aider import models, prompts, voice +from aider.help import Help from aider.llm import litellm from aider.scrape import Scraper from aider.utils import is_image_file @@ -32,6 +33,8 @@ class Commands: self.voice_language = voice_language + self.help = None + def cmd_model(self, args): "Switch to a new LLM" @@ -622,15 +625,31 @@ class Commands: def cmd_help(self, args): "Show help about all commands" - commands = sorted(self.get_commands()) - for cmd in commands: - cmd_method_name = f"cmd_{cmd[1:]}" - cmd_method = getattr(self, cmd_method_name, None) - if cmd_method: - description = cmd_method.__doc__ - self.io.tool_output(f"{cmd} {description}") - else: - self.io.tool_output(f"{cmd} No description available.") + + from aider.coders import Coder + + if not self.help: + self.help = Help() + + coder = Coder.create( + main_model=self.coder.main_model, + io=self.io, + from_coder=self.coder, + edit_format="help", + ) + user_msg = self.help.ask(args) + user_msg += """ +# Announcement lines from when this session of aider was launched: + +""" + user_msg += "\n".join(self.coder.get_announcements()) + "\n" + + assistant_msg = coder.run(user_msg) + + self.coder.cur_messages += [ + dict(role="user", content=user_msg), + dict(role="assistant", content=assistant_msg), + ] def get_help_md(self): "Show help about all commands in markdown" diff --git a/aider/help.py b/aider/help.py index 07625ac71..566ce85da 100755 --- a/aider/help.py +++ b/aider/help.py @@ -1,19 +1,15 @@ #!/usr/bin/env python -import time +import os +import sys +import warnings from pathlib import Path -import litellm -from dump import dump -from llama_index.core import ( - Document, - StorageContext, - VectorStoreIndex, - load_index_from_storage, -) -from llama_index.core.node_parser import MarkdownNodeParser +from tqdm import tqdm -litellm.suppress_debug_info = True +from aider.dump import dump # noqa: F401 + +warnings.simplefilter("ignore", category=FutureWarning) def should_skip_dir(dirname): @@ -35,75 +31,38 @@ def walk_subdirs_for_files(root_dir): yield str(path) -def execute(question, text): - sys_content = """Answer questions about how to use the Aider program. -Give answers about how to use aider to accomplish the user's questions, -not general advice on how to use other tools or approaches. - -Use the provided aider documentation *if it is relevant to the user's questions*. - -Include a urls to the aider docs that might be relevant for the user to read -. - -If you don't know the answer, say so and suggest some relevant aider doc urls. -If the user asks how to do something that aider doesn't support, tell them that. - -Be helpful but concise. - -Unless the question indicates otherwise, assume the user wants to use -aider as a CLI tool. -""" - - usage = Path("website/docs/usage.md").read_text() - - content = f"""# Question: - -{question} - - -# Relevant documentation: - -{text} - -##### - -{usage} -""" - - messages = [ - dict( - role="system", - content=sys_content, - ), - dict( - role="user", - content=content, - ), - ] - - res = litellm.completion( - messages=messages, - # model="gpt-3.5-turbo", - model="gpt-4o", - ) - - return res - - def fname_to_url(filepath): + website = "website/" + index = "/index.md" + md = ".md" + + docid = "" if filepath.startswith("website/_includes/"): - docid = "" - else: - website = "website/" - assert filepath.startswith(website), filepath + pass + elif filepath.startswith(website): docid = filepath[len(website) :] + + if filepath.endswith(index): + filepath = filepath[: -len(index)] + "/" + elif filepath.endswith(md): + filepath = filepath[: -len(md)] + ".html" + docid = "https://aider.chat/" + filepath return docid def get_index(): - dname = Path("storage") + from llama_index.core import ( + Document, + StorageContext, + VectorStoreIndex, + load_index_from_storage, + ) + from llama_index.core.node_parser import MarkdownNodeParser + + dname = Path.home() / ".aider" / "help" + if dname.exists(): storage_context = StorageContext.from_defaults( persist_dir=dname, @@ -113,9 +72,7 @@ def get_index(): parser = MarkdownNodeParser() nodes = [] - for fname in walk_subdirs_for_files("website"): - dump(fname) - # doc = FlatReader().load_data(Path(fname)) + for fname in tqdm(list(walk_subdirs_for_files("website"))): fname = Path(fname) doc = Document( text=fname.read_text(), @@ -128,19 +85,44 @@ def get_index(): nodes += parser.get_nodes_from_documents([doc]) index = VectorStoreIndex(nodes) + dname.parent.mkdir(exist_ok=True) index.storage_context.persist(dname) return index -when = time.time() +class Help: + def __init__(self): + from llama_index.core import Settings + from llama_index.embeddings.huggingface import HuggingFaceEmbedding -index = get_index() + os.environ["TOKENIZERS_PARALLELISM"] = "true" + Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") -print("get_index", time.time() - when) -when = time.time() + index = get_index() + + self.retriever = index.as_retriever(similarity_top_k=20) + + def ask(self, question): + nodes = self.retriever.retrieve(question) + + context = f"""# Question: {question} + +# Relevant docs: + +""" + + for node in nodes: + url = node.metadata.get("url", "") + if url: + url = f' from_url="{url}"' + + context += f"\n" + context += node.text + context += "\n\n\n" + + return context -retriever = index.as_retriever(similarity_top_k=20) # # question = "how can i convert a python script to js" @@ -148,34 +130,9 @@ retriever = index.as_retriever(similarity_top_k=20) # question = "i am getting an error message about exhausted context window" # question = "The chat session is larger than the context window!" # question = "how do i add deepseek api key to yaml" -question = ( - "It would be great if I could give aider an example github PR and instruct it to do the same" - " exact thing for another integration." -) +# question = ( +# "It would be great if I could give aider an example github PR and instruct it to do the same" +# " exact thing for another integration." +# ) -nodes = retriever.retrieve(question) - -print("retrieve", time.time() - when) -when = time.time() - -dump(len(nodes)) - -context = "" -for node in nodes: - fname = node.metadata["filename"] - url = node.metadata.get("url", "") - if url: - url = f' from_url="{url}"' - - context += f"\n" - context += node.text - context += "\n\n\n" - -# dump(context) - -res = execute(question, context) -content = res.choices[0].message.content -dump(content) - -print("llm", time.time() - when) -when = time.time() +question = " ".join(sys.argv[1:]) diff --git a/requirements.in b/requirements.in index 72b929e84..411d46e8b 100644 --- a/requirements.in +++ b/requirements.in @@ -27,6 +27,8 @@ google-generativeai streamlit watchdog flake8 +llama-index-core +llama-index-embeddings-huggingface # v3.3 no longer works on python 3.9 networkx<3.3 @@ -40,4 +42,4 @@ scipy<1.14 # GitHub Release action failing on "KeyError: 'home-page'" # https://github.com/pypa/twine/blob/6fbf880ee60915cf1666348c4bdd78a10415f2ac/twine/__init__.py#L40 # Uses importlib-metadata -importlib-metadata<8.0.0 \ No newline at end of file +importlib-metadata<8.0.0