added HelpCoder

This commit is contained in:
Paul Gauthier 2024-07-04 14:29:19 -03:00
parent b3eb1dea49
commit 9f39c8db44
7 changed files with 150 additions and 119 deletions

View file

@ -2,6 +2,7 @@ from .base_coder import Coder
from .editblock_coder import EditBlockCoder from .editblock_coder import EditBlockCoder
from .editblock_fenced_coder import EditBlockFencedCoder from .editblock_fenced_coder import EditBlockFencedCoder
from .editblock_func_coder import EditBlockFunctionCoder from .editblock_func_coder import EditBlockFunctionCoder
from .help_coder import HelpCoder
from .single_wholefile_func_coder import SingleWholeFileFunctionCoder from .single_wholefile_func_coder import SingleWholeFileFunctionCoder
from .udiff_coder import UnifiedDiffCoder from .udiff_coder import UnifiedDiffCoder
from .wholefile_coder import WholeFileCoder from .wholefile_coder import WholeFileCoder
@ -16,4 +17,5 @@ __all__ = [
EditBlockFunctionCoder, EditBlockFunctionCoder,
SingleWholeFileFunctionCoder, SingleWholeFileFunctionCoder,
UnifiedDiffCoder, UnifiedDiffCoder,
HelpCoder,
] ]

View file

@ -80,6 +80,7 @@ class Coder:
from . import ( from . import (
EditBlockCoder, EditBlockCoder,
EditBlockFencedCoder, EditBlockFencedCoder,
HelpCoder,
UnifiedDiffCoder, UnifiedDiffCoder,
WholeFileCoder, WholeFileCoder,
) )
@ -130,6 +131,8 @@ class Coder:
res = WholeFileCoder(main_model, io, **kwargs) res = WholeFileCoder(main_model, io, **kwargs)
elif edit_format == "udiff": elif edit_format == "udiff":
res = UnifiedDiffCoder(main_model, io, **kwargs) res = UnifiedDiffCoder(main_model, io, **kwargs)
elif edit_format == "help":
res = HelpCoder(main_model, io, **kwargs)
else: else:
raise ValueError(f"Unknown edit format {edit_format}") raise ValueError(f"Unknown edit format {edit_format}")

View file

@ -0,0 +1,17 @@
from ..dump import dump # noqa: F401
from .base_coder import Coder
from .help_prompts import HelpPrompts
class HelpCoder(Coder):
edit_format = "help"
def __init__(self, *args, **kwargs):
self.gpt_prompts = HelpPrompts()
super().__init__(*args, **kwargs)
def get_edits(self, mode="update"):
return []
def apply_edits(self, edits):
pass

View file

@ -0,0 +1,31 @@
# flake8: noqa: E501
from .base_prompts import CoderPrompts
class HelpPrompts(CoderPrompts):
main_system = """You are an expert on the AI coding tool called Aider.
Answer the user's questions about how to use aider.
The user is currently chatting with you using aider, to write and edit code.
Use the provided aider documentation *if it is relevant to the user's question*.
Include a bulleted list of urls to the aider docs that might be relevant for the user to read.
Include *bare* urls. *Do not* make [markdown links](http://...).
For example:
- https://aider.chat/docs/usage.html
- https://aider.chat/docs/faq.html
If you don't know the answer, say so and suggest some relevant aider doc urls.
If asks for something that isn't possible with aider, be clear about that.
Don't suggest a solution that isn't supported.
Be helpful but concise.
Unless the question indicates otherwise, assume the user wants to use aider as a CLI tool.
"""
example_messages = []
system_reminder = ""

View file

@ -7,6 +7,7 @@ from pathlib import Path
import git import git
from aider import models, prompts, voice from aider import models, prompts, voice
from aider.help import Help
from aider.llm import litellm from aider.llm import litellm
from aider.scrape import Scraper from aider.scrape import Scraper
from aider.utils import is_image_file from aider.utils import is_image_file
@ -32,6 +33,8 @@ class Commands:
self.voice_language = voice_language self.voice_language = voice_language
self.help = None
def cmd_model(self, args): def cmd_model(self, args):
"Switch to a new LLM" "Switch to a new LLM"
@ -622,15 +625,31 @@ class Commands:
def cmd_help(self, args): def cmd_help(self, args):
"Show help about all commands" "Show help about all commands"
commands = sorted(self.get_commands())
for cmd in commands: from aider.coders import Coder
cmd_method_name = f"cmd_{cmd[1:]}"
cmd_method = getattr(self, cmd_method_name, None) if not self.help:
if cmd_method: self.help = Help()
description = cmd_method.__doc__
self.io.tool_output(f"{cmd} {description}") coder = Coder.create(
else: main_model=self.coder.main_model,
self.io.tool_output(f"{cmd} No description available.") io=self.io,
from_coder=self.coder,
edit_format="help",
)
user_msg = self.help.ask(args)
user_msg += """
# Announcement lines from when this session of aider was launched:
"""
user_msg += "\n".join(self.coder.get_announcements()) + "\n"
assistant_msg = coder.run(user_msg)
self.coder.cur_messages += [
dict(role="user", content=user_msg),
dict(role="assistant", content=assistant_msg),
]
def get_help_md(self): def get_help_md(self):
"Show help about all commands in markdown" "Show help about all commands in markdown"

View file

@ -1,19 +1,15 @@
#!/usr/bin/env python #!/usr/bin/env python
import time import os
import sys
import warnings
from pathlib import Path from pathlib import Path
import litellm from tqdm import tqdm
from dump import dump
from llama_index.core import (
Document,
StorageContext,
VectorStoreIndex,
load_index_from_storage,
)
from llama_index.core.node_parser import MarkdownNodeParser
litellm.suppress_debug_info = True from aider.dump import dump # noqa: F401
warnings.simplefilter("ignore", category=FutureWarning)
def should_skip_dir(dirname): def should_skip_dir(dirname):
@ -35,75 +31,38 @@ def walk_subdirs_for_files(root_dir):
yield str(path) yield str(path)
def execute(question, text):
sys_content = """Answer questions about how to use the Aider program.
Give answers about how to use aider to accomplish the user's questions,
not general advice on how to use other tools or approaches.
Use the provided aider documentation *if it is relevant to the user's questions*.
Include a urls to the aider docs that might be relevant for the user to read
.
If you don't know the answer, say so and suggest some relevant aider doc urls.
If the user asks how to do something that aider doesn't support, tell them that.
Be helpful but concise.
Unless the question indicates otherwise, assume the user wants to use
aider as a CLI tool.
"""
usage = Path("website/docs/usage.md").read_text()
content = f"""# Question:
{question}
# Relevant documentation:
{text}
#####
{usage}
"""
messages = [
dict(
role="system",
content=sys_content,
),
dict(
role="user",
content=content,
),
]
res = litellm.completion(
messages=messages,
# model="gpt-3.5-turbo",
model="gpt-4o",
)
return res
def fname_to_url(filepath): def fname_to_url(filepath):
website = "website/"
index = "/index.md"
md = ".md"
docid = ""
if filepath.startswith("website/_includes/"): if filepath.startswith("website/_includes/"):
docid = "" pass
else: elif filepath.startswith(website):
website = "website/"
assert filepath.startswith(website), filepath
docid = filepath[len(website) :] docid = filepath[len(website) :]
if filepath.endswith(index):
filepath = filepath[: -len(index)] + "/"
elif filepath.endswith(md):
filepath = filepath[: -len(md)] + ".html"
docid = "https://aider.chat/" + filepath docid = "https://aider.chat/" + filepath
return docid return docid
def get_index(): def get_index():
dname = Path("storage") from llama_index.core import (
Document,
StorageContext,
VectorStoreIndex,
load_index_from_storage,
)
from llama_index.core.node_parser import MarkdownNodeParser
dname = Path.home() / ".aider" / "help"
if dname.exists(): if dname.exists():
storage_context = StorageContext.from_defaults( storage_context = StorageContext.from_defaults(
persist_dir=dname, persist_dir=dname,
@ -113,9 +72,7 @@ def get_index():
parser = MarkdownNodeParser() parser = MarkdownNodeParser()
nodes = [] nodes = []
for fname in walk_subdirs_for_files("website"): for fname in tqdm(list(walk_subdirs_for_files("website"))):
dump(fname)
# doc = FlatReader().load_data(Path(fname))
fname = Path(fname) fname = Path(fname)
doc = Document( doc = Document(
text=fname.read_text(), text=fname.read_text(),
@ -128,19 +85,44 @@ def get_index():
nodes += parser.get_nodes_from_documents([doc]) nodes += parser.get_nodes_from_documents([doc])
index = VectorStoreIndex(nodes) index = VectorStoreIndex(nodes)
dname.parent.mkdir(exist_ok=True)
index.storage_context.persist(dname) index.storage_context.persist(dname)
return index return index
when = time.time() class Help:
def __init__(self):
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
index = get_index() os.environ["TOKENIZERS_PARALLELISM"] = "true"
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
print("get_index", time.time() - when) index = get_index()
when = time.time()
self.retriever = index.as_retriever(similarity_top_k=20)
def ask(self, question):
nodes = self.retriever.retrieve(question)
context = f"""# Question: {question}
# Relevant docs:
"""
for node in nodes:
url = node.metadata.get("url", "")
if url:
url = f' from_url="{url}"'
context += f"<doc{url}>\n"
context += node.text
context += "\n</doc>\n\n"
return context
retriever = index.as_retriever(similarity_top_k=20)
# #
# question = "how can i convert a python script to js" # question = "how can i convert a python script to js"
@ -148,34 +130,9 @@ retriever = index.as_retriever(similarity_top_k=20)
# question = "i am getting an error message about exhausted context window" # question = "i am getting an error message about exhausted context window"
# question = "The chat session is larger than the context window!" # question = "The chat session is larger than the context window!"
# question = "how do i add deepseek api key to yaml" # question = "how do i add deepseek api key to yaml"
question = ( # question = (
"It would be great if I could give aider an example github PR and instruct it to do the same" # "It would be great if I could give aider an example github PR and instruct it to do the same"
" exact thing for another integration." # " exact thing for another integration."
) # )
nodes = retriever.retrieve(question) question = " ".join(sys.argv[1:])
print("retrieve", time.time() - when)
when = time.time()
dump(len(nodes))
context = ""
for node in nodes:
fname = node.metadata["filename"]
url = node.metadata.get("url", "")
if url:
url = f' from_url="{url}"'
context += f"<doc{url}>\n"
context += node.text
context += "\n</doc>\n\n"
# dump(context)
res = execute(question, context)
content = res.choices[0].message.content
dump(content)
print("llm", time.time() - when)
when = time.time()

View file

@ -27,6 +27,8 @@ google-generativeai
streamlit streamlit
watchdog watchdog
flake8 flake8
llama-index-core
llama-index-embeddings-huggingface
# v3.3 no longer works on python 3.9 # v3.3 no longer works on python 3.9
networkx<3.3 networkx<3.3
@ -40,4 +42,4 @@ scipy<1.14
# GitHub Release action failing on "KeyError: 'home-page'" # GitHub Release action failing on "KeyError: 'home-page'"
# https://github.com/pypa/twine/blob/6fbf880ee60915cf1666348c4bdd78a10415f2ac/twine/__init__.py#L40 # https://github.com/pypa/twine/blob/6fbf880ee60915cf1666348c4bdd78a10415f2ac/twine/__init__.py#L40
# Uses importlib-metadata # Uses importlib-metadata
importlib-metadata<8.0.0 importlib-metadata<8.0.0