mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-25 14:55:00 +00:00
Implemented a more efficient file filtering mechanism for the website content indexing process.
This commit is contained in:
parent
e951974c43
commit
e3877b9855
1 changed files with 16 additions and 14 deletions
|
@ -12,23 +12,25 @@ from aider.dump import dump # noqa: F401
|
||||||
|
|
||||||
warnings.simplefilter("ignore", category=FutureWarning)
|
warnings.simplefilter("ignore", category=FutureWarning)
|
||||||
|
|
||||||
|
exclude_website_pats = [
|
||||||
|
"examples/**",
|
||||||
|
"_posts/**",
|
||||||
|
"HISTORY.md",
|
||||||
|
"docs/benchmarks*md",
|
||||||
|
"docs/ctags.md",
|
||||||
|
"docs/unified-diffs.md",
|
||||||
|
"docs/leaderboards/index.md",
|
||||||
|
"assets/**",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def get_package_files():
|
def get_package_files():
|
||||||
for path in importlib_resources.files("website").iterdir():
|
for path in importlib_resources.files("website").iterdir():
|
||||||
dump(path)
|
if path.is_file():
|
||||||
if path.is_file() and path.name.endswith(".md"):
|
yield path
|
||||||
if not any(
|
|
||||||
part.startswith(("OLD", "tmp")) or part in ("examples", "_posts")
|
|
||||||
for part in path.parts
|
|
||||||
):
|
|
||||||
yield str(path)
|
|
||||||
elif path.is_dir():
|
elif path.is_dir():
|
||||||
for subpath in path.rglob("*.md"):
|
for subpath in path.rglob("*.md"):
|
||||||
if not any(
|
yield subpath
|
||||||
part.startswith(("OLD", "tmp")) or part in ("examples", "_posts")
|
|
||||||
for part in subpath.parts
|
|
||||||
):
|
|
||||||
yield str(subpath)
|
|
||||||
|
|
||||||
|
|
||||||
def fname_to_url(filepath):
|
def fname_to_url(filepath):
|
||||||
|
@ -74,7 +76,7 @@ def get_index():
|
||||||
nodes = []
|
nodes = []
|
||||||
for fname in tqdm(list(get_package_files())):
|
for fname in tqdm(list(get_package_files())):
|
||||||
fname = Path(fname)
|
fname = Path(fname)
|
||||||
dump(fname)
|
# todo: skip if matches exclude website pats
|
||||||
doc = Document(
|
doc = Document(
|
||||||
text=importlib_resources.files("website").joinpath(fname).read_text(),
|
text=importlib_resources.files("website").joinpath(fname).read_text(),
|
||||||
metadata=dict(
|
metadata=dict(
|
||||||
|
@ -85,7 +87,7 @@ def get_index():
|
||||||
)
|
)
|
||||||
nodes += parser.get_nodes_from_documents([doc])
|
nodes += parser.get_nodes_from_documents([doc])
|
||||||
|
|
||||||
index = VectorStoreIndex(nodes)
|
index = VectorStoreIndex(nodes, show_progress=True)
|
||||||
dname.parent.mkdir(exist_ok=True)
|
dname.parent.mkdir(exist_ok=True)
|
||||||
index.storage_context.persist(dname)
|
index.storage_context.persist(dname)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue