mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-24 22:34:59 +00:00
added HelpCoder
This commit is contained in:
parent
b3eb1dea49
commit
9f39c8db44
7 changed files with 150 additions and 119 deletions
175
aider/help.py
175
aider/help.py
|
@ -1,19 +1,15 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import time
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
|
||||
import litellm
|
||||
from dump import dump
|
||||
from llama_index.core import (
|
||||
Document,
|
||||
StorageContext,
|
||||
VectorStoreIndex,
|
||||
load_index_from_storage,
|
||||
)
|
||||
from llama_index.core.node_parser import MarkdownNodeParser
|
||||
from tqdm import tqdm
|
||||
|
||||
litellm.suppress_debug_info = True
|
||||
from aider.dump import dump # noqa: F401
|
||||
|
||||
warnings.simplefilter("ignore", category=FutureWarning)
|
||||
|
||||
|
||||
def should_skip_dir(dirname):
|
||||
|
@ -35,75 +31,38 @@ def walk_subdirs_for_files(root_dir):
|
|||
yield str(path)
|
||||
|
||||
|
||||
def execute(question, text):
|
||||
sys_content = """Answer questions about how to use the Aider program.
|
||||
Give answers about how to use aider to accomplish the user's questions,
|
||||
not general advice on how to use other tools or approaches.
|
||||
|
||||
Use the provided aider documentation *if it is relevant to the user's questions*.
|
||||
|
||||
Include a urls to the aider docs that might be relevant for the user to read
|
||||
.
|
||||
|
||||
If you don't know the answer, say so and suggest some relevant aider doc urls.
|
||||
If the user asks how to do something that aider doesn't support, tell them that.
|
||||
|
||||
Be helpful but concise.
|
||||
|
||||
Unless the question indicates otherwise, assume the user wants to use
|
||||
aider as a CLI tool.
|
||||
"""
|
||||
|
||||
usage = Path("website/docs/usage.md").read_text()
|
||||
|
||||
content = f"""# Question:
|
||||
|
||||
{question}
|
||||
|
||||
|
||||
# Relevant documentation:
|
||||
|
||||
{text}
|
||||
|
||||
#####
|
||||
|
||||
{usage}
|
||||
"""
|
||||
|
||||
messages = [
|
||||
dict(
|
||||
role="system",
|
||||
content=sys_content,
|
||||
),
|
||||
dict(
|
||||
role="user",
|
||||
content=content,
|
||||
),
|
||||
]
|
||||
|
||||
res = litellm.completion(
|
||||
messages=messages,
|
||||
# model="gpt-3.5-turbo",
|
||||
model="gpt-4o",
|
||||
)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def fname_to_url(filepath):
|
||||
website = "website/"
|
||||
index = "/index.md"
|
||||
md = ".md"
|
||||
|
||||
docid = ""
|
||||
if filepath.startswith("website/_includes/"):
|
||||
docid = ""
|
||||
else:
|
||||
website = "website/"
|
||||
assert filepath.startswith(website), filepath
|
||||
pass
|
||||
elif filepath.startswith(website):
|
||||
docid = filepath[len(website) :]
|
||||
|
||||
if filepath.endswith(index):
|
||||
filepath = filepath[: -len(index)] + "/"
|
||||
elif filepath.endswith(md):
|
||||
filepath = filepath[: -len(md)] + ".html"
|
||||
|
||||
docid = "https://aider.chat/" + filepath
|
||||
|
||||
return docid
|
||||
|
||||
|
||||
def get_index():
|
||||
dname = Path("storage")
|
||||
from llama_index.core import (
|
||||
Document,
|
||||
StorageContext,
|
||||
VectorStoreIndex,
|
||||
load_index_from_storage,
|
||||
)
|
||||
from llama_index.core.node_parser import MarkdownNodeParser
|
||||
|
||||
dname = Path.home() / ".aider" / "help"
|
||||
|
||||
if dname.exists():
|
||||
storage_context = StorageContext.from_defaults(
|
||||
persist_dir=dname,
|
||||
|
@ -113,9 +72,7 @@ def get_index():
|
|||
parser = MarkdownNodeParser()
|
||||
|
||||
nodes = []
|
||||
for fname in walk_subdirs_for_files("website"):
|
||||
dump(fname)
|
||||
# doc = FlatReader().load_data(Path(fname))
|
||||
for fname in tqdm(list(walk_subdirs_for_files("website"))):
|
||||
fname = Path(fname)
|
||||
doc = Document(
|
||||
text=fname.read_text(),
|
||||
|
@ -128,19 +85,44 @@ def get_index():
|
|||
nodes += parser.get_nodes_from_documents([doc])
|
||||
|
||||
index = VectorStoreIndex(nodes)
|
||||
dname.parent.mkdir(exist_ok=True)
|
||||
index.storage_context.persist(dname)
|
||||
|
||||
return index
|
||||
|
||||
|
||||
when = time.time()
|
||||
class Help:
|
||||
def __init__(self):
|
||||
from llama_index.core import Settings
|
||||
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||
|
||||
index = get_index()
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "true"
|
||||
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
||||
|
||||
print("get_index", time.time() - when)
|
||||
when = time.time()
|
||||
index = get_index()
|
||||
|
||||
self.retriever = index.as_retriever(similarity_top_k=20)
|
||||
|
||||
def ask(self, question):
|
||||
nodes = self.retriever.retrieve(question)
|
||||
|
||||
context = f"""# Question: {question}
|
||||
|
||||
# Relevant docs:
|
||||
|
||||
"""
|
||||
|
||||
for node in nodes:
|
||||
url = node.metadata.get("url", "")
|
||||
if url:
|
||||
url = f' from_url="{url}"'
|
||||
|
||||
context += f"<doc{url}>\n"
|
||||
context += node.text
|
||||
context += "\n</doc>\n\n"
|
||||
|
||||
return context
|
||||
|
||||
retriever = index.as_retriever(similarity_top_k=20)
|
||||
|
||||
#
|
||||
# question = "how can i convert a python script to js"
|
||||
|
@ -148,34 +130,9 @@ retriever = index.as_retriever(similarity_top_k=20)
|
|||
# question = "i am getting an error message about exhausted context window"
|
||||
# question = "The chat session is larger than the context window!"
|
||||
# question = "how do i add deepseek api key to yaml"
|
||||
question = (
|
||||
"It would be great if I could give aider an example github PR and instruct it to do the same"
|
||||
" exact thing for another integration."
|
||||
)
|
||||
# question = (
|
||||
# "It would be great if I could give aider an example github PR and instruct it to do the same"
|
||||
# " exact thing for another integration."
|
||||
# )
|
||||
|
||||
nodes = retriever.retrieve(question)
|
||||
|
||||
print("retrieve", time.time() - when)
|
||||
when = time.time()
|
||||
|
||||
dump(len(nodes))
|
||||
|
||||
context = ""
|
||||
for node in nodes:
|
||||
fname = node.metadata["filename"]
|
||||
url = node.metadata.get("url", "")
|
||||
if url:
|
||||
url = f' from_url="{url}"'
|
||||
|
||||
context += f"<doc{url}>\n"
|
||||
context += node.text
|
||||
context += "\n</doc>\n\n"
|
||||
|
||||
# dump(context)
|
||||
|
||||
res = execute(question, context)
|
||||
content = res.choices[0].message.content
|
||||
dump(content)
|
||||
|
||||
print("llm", time.time() - when)
|
||||
when = time.time()
|
||||
question = " ".join(sys.argv[1:])
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue