From 352917de7bbc719f8ba42d99876699a87a27b03e Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 25 Jun 2024 15:08:18 -0700 Subject: [PATCH] Implemented a help module to provide documentation-based answers to user questions about the Aider program. --- aider/help.py | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100755 aider/help.py diff --git a/aider/help.py b/aider/help.py new file mode 100755 index 000000000..5f914b579 --- /dev/null +++ b/aider/help.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python + +import json +import os +import sys +import time +from collections import defaultdict +from pathlib import Path + +import litellm + +litellm.suppress_debug_info = True + +import faiss +from dump import dump +from llama_index.core import ( + Document, + SimpleDirectoryReader, + StorageContext, + VectorStoreIndex, + load_index_from_storage, +) +from llama_index.core.ingestion import IngestionPipeline +from llama_index.core.node_parser import MarkdownNodeParser +from llama_index.core.storage.docstore import SimpleDocumentStore +from llama_index.core.storage.index_store import SimpleIndexStore +from llama_index.readers.file import FlatReader + + +def should_skip_dir(dirname): + if dirname.startswith("OLD"): + return True + if dirname.startswith("tmp"): + return True + if dirname == "examples": + return True + if dirname == "_posts": + return True + + +def walk_subdirs_for_files(root_dir): + root_path = Path(root_dir) + for path in root_path.rglob("*.md"): + if any(should_skip_dir(part) for part in path.parts): + continue + yield str(path) + + +def execute(question, text): + sys_content = """Answer questions about how to use the Aider program. +Give answers about how to use aider to accomplish the user's questions, +not general advice on how to use other tools or approaches. + +Use the provided aider documentation *if it is relevant to the user's questions*. + +Include a urls to the aider docs that might be relevant for the user to read +. + +If you don't know the answer, say so and suggest some relevant aider doc urls. +If the user asks how to do something that aider doesn't support, tell them that. + +Be helpful but concise. + +Unless the question indicates otherwise, assume the user wants to use +aider as a CLI tool. +""" + + usage = Path("website/docs/usage.md").read_text() + + content = f"""# Question: + +{question} + + +# Relevant documentation: + +{text} + +##### + +{usage} +""" + + messages = [ + dict( + role="system", + content=sys_content, + ), + dict( + role="user", + content=content, + ), + ] + + res = litellm.completion( + messages=messages, + # model="gpt-3.5-turbo", + model="gpt-4o", + ) + + return res + + +def fname_to_url(filepath): + if filepath.startswith("website/_includes/"): + docid = "" + else: + website = "website/" + assert filepath.startswith(website), filepath + docid = filepath[len(website) :] + docid = "https://aider.chat/" + filepath + + return docid + + +def get_index(): + dname = Path("storage") + if dname.exists(): + storage_context = StorageContext.from_defaults( + persist_dir=dname, + ) + index = load_index_from_storage(storage_context) + else: + parser = MarkdownNodeParser() + + nodes = [] + for fname in walk_subdirs_for_files("website"): + dump(fname) + # doc = FlatReader().load_data(Path(fname)) + fname = Path(fname) + doc = Document( + text=fname.read_text(), + metadata=dict( + filename=fname.name, + extension=fname.suffix, + url=fname_to_url(str(fname)), + ), + ) + nodes += parser.get_nodes_from_documents([doc]) + + index = VectorStoreIndex(nodes) + index.storage_context.persist(dname) + + return index + + +when = time.time() + +index = get_index() + +print("get_index", time.time() - when) +when = time.time() + +retriever = index.as_retriever(similarity_top_k=20) + +# question = "how can i convert a python script to js" +# question = "i am getting an error message about unknown context window" +# question = "i am getting an error message about exhausted context window" +# question = "The chat session is larger than the context window!" +# question = "how do i add deepseek api key to yaml" +question = ( + "It would be great if I could give aider an example github PR and instruct it to do the same" + " exact thing for another integration." +) + +nodes = retriever.retrieve(question) + +print("retrieve", time.time() - when) +when = time.time() + +dump(len(nodes)) + +context = "" +for node in nodes: + fname = node.metadata["filename"] + url = node.metadata.get("url", "") + if url: + url = f' from_url="{url}"' + + context += f"\n" + context += node.text + context += "\n\n\n" + +# dump(context) + +res = execute(question, context) +content = res.choices[0].message.content +dump(content) + +print("llm", time.time() - when) +when = time.time()