feed markdown into summarizer

This commit is contained in:
Paul Gauthier 2023-07-22 09:58:57 -03:00
parent c8e8967d45
commit a2ce9c4403

View file

@ -1,7 +1,6 @@
import argparse
import json
import markdown
import tiktoken
from aider import models, prompts
@ -9,30 +8,6 @@ from aider.dump import dump # noqa: F401
from aider.sendchat import simple_send_with_retries
def main():
parser = argparse.ArgumentParser()
parser.add_argument("filename", help="Markdown file to parse")
args = parser.parse_args()
with open(args.filename, "r") as f:
text = f.read()
md = markdown.Markdown()
tree = md.parse(text)
for element in tree.getiterator():
if element.tag in ["h1", "h4"] and element.text is not None:
print(element.text)
elif element.tag == "blockquote":
continue
else:
print(element.text)
if __name__ == "__main__":
main()
class ChatSummary:
def __init__(self, model, max_tokens=1024):
self.tokenizer = tiktoken.encoding_for_model(model)
@ -93,3 +68,41 @@ class ChatSummary:
summary = simple_send_with_retries(model=models.GPT35.name, messages=messages)
dump(summary)
return [dict(role="user", content=summary)]
def main():
parser = argparse.ArgumentParser()
parser.add_argument("filename", help="Markdown file to parse")
args = parser.parse_args()
with open(args.filename, "r") as f:
text = f.read()
messages = []
assistant = []
for line in text.splitlines(keepends=True):
if line.startswith("# "):
continue
if line.startswith(">"):
continue
if line.startswith("#### "):
if assistant:
assistant = "".join(assistant)
messages.append(dict(role="assistant", content=assistant))
assistant = []
messages.append(dict(role="user", content=line[5:]))
continue
assistant.append(line)
summarizer = ChatSummary(models.GPT35.name)
summary = summarizer.summarize(messages[-40:])
dump(summary)
if __name__ == "__main__":
main()