mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-20 20:35:00 +00:00
feat: Improve language tag download script with multi-branch support
This commit is contained in:
parent
849e02cbfb
commit
a6ebed8d16
1 changed files with 86 additions and 26 deletions
|
@ -3,10 +3,42 @@
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_branch(owner, repo):
|
||||||
|
"""Get the default branch of a GitHub repository using the API."""
|
||||||
|
api_url = f"https://api.github.com/repos/{owner}/{repo}"
|
||||||
|
try:
|
||||||
|
response = requests.get(api_url)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json().get("default_branch")
|
||||||
|
except requests.exceptions.RequestException:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def try_download_tags(owner, repo, branch, directory, output_path):
|
||||||
|
"""Try to download tags.scm from a specific branch."""
|
||||||
|
base_url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}"
|
||||||
|
if directory:
|
||||||
|
tags_url = f"{base_url}/{directory}/queries/tags.scm"
|
||||||
|
else:
|
||||||
|
tags_url = f"{base_url}/queries/tags.scm"
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(tags_url)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# Save the file
|
||||||
|
with open(output_path, "w") as f:
|
||||||
|
f.write(response.text)
|
||||||
|
return True
|
||||||
|
except requests.exceptions.RequestException:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Path to the language definitions file
|
# Path to the language definitions file
|
||||||
lang_def_path = "../../tmp/tree-sitter-language-pack/sources/language_definitions.json"
|
lang_def_path = "../../tmp/tree-sitter-language-pack/sources/language_definitions.json"
|
||||||
|
@ -17,6 +49,9 @@ def main():
|
||||||
# Create the output directory if it doesn't exist
|
# Create the output directory if it doesn't exist
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Common branch names to try if API fails and config branch doesn't work
|
||||||
|
common_branches = ["main", "master", "dev", "develop"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Load the language definitions
|
# Load the language definitions
|
||||||
with open(lang_def_path, "r") as f:
|
with open(lang_def_path, "r") as f:
|
||||||
|
@ -28,16 +63,18 @@ def main():
|
||||||
print(f"Found {len(lang_defs)} language definitions")
|
print(f"Found {len(lang_defs)} language definitions")
|
||||||
|
|
||||||
# Process each language
|
# Process each language
|
||||||
|
successes = 0
|
||||||
|
total = len(lang_defs)
|
||||||
|
|
||||||
for lang, config in lang_defs.items():
|
for lang, config in lang_defs.items():
|
||||||
print(f"Processing {lang}...")
|
print(f"Processing {lang}...")
|
||||||
|
|
||||||
# Extract repo URL and branch from the config
|
# Extract repo URL from the config
|
||||||
repo_url = config.get("repo")
|
repo_url = config.get("repo")
|
||||||
if not repo_url:
|
if not repo_url:
|
||||||
print(f"Skipping {lang}: No repository URL found")
|
print(f"Skipping {lang}: No repository URL found")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
branch = config.get("branch", "master")
|
|
||||||
directory = config.get("directory", "")
|
directory = config.get("directory", "")
|
||||||
|
|
||||||
# Parse the GitHub repository URL
|
# Parse the GitHub repository URL
|
||||||
|
@ -46,35 +83,58 @@ def main():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Extract the owner and repo name from the URL
|
# Extract the owner and repo name from the URL
|
||||||
_, _, _, owner, repo = repo_url.rstrip("/").split("/")
|
parts = repo_url.rstrip("/").split("/")
|
||||||
|
if len(parts) < 5:
|
||||||
|
print(f"Skipping {lang}: Invalid GitHub URL format")
|
||||||
|
continue
|
||||||
|
|
||||||
# Construct the raw file URL
|
owner = parts[-2]
|
||||||
# Build the GitHub raw content path
|
repo = parts[-1]
|
||||||
base_url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}"
|
|
||||||
if directory:
|
|
||||||
tags_url = f"{base_url}/{directory}/queries/tags.scm"
|
|
||||||
else:
|
|
||||||
tags_url = f"{base_url}/queries/tags.scm"
|
|
||||||
|
|
||||||
# Create the language directory in the output path
|
# Create the language directory in the output path
|
||||||
lang_dir = os.path.join(output_dir, lang)
|
lang_dir = os.path.join(output_dir, lang)
|
||||||
os.makedirs(os.path.join(lang_dir, "queries"), exist_ok=True)
|
queries_dir = os.path.join(lang_dir, "queries")
|
||||||
|
os.makedirs(queries_dir, exist_ok=True)
|
||||||
|
output_file = os.path.join(queries_dir, "tags.scm")
|
||||||
|
|
||||||
# Fetch the tags.scm file
|
# Try branches in this order:
|
||||||
try:
|
# 1. Branch specified in the config
|
||||||
response = requests.get(tags_url)
|
# 2. Default branch from GitHub API
|
||||||
response.raise_for_status() # Raise an exception for HTTP errors
|
# 3. Common branch names (main, master, etc.)
|
||||||
|
|
||||||
# Save the file
|
branches_to_try = []
|
||||||
output_file = os.path.join(lang_dir, "queries", "tags.scm")
|
|
||||||
with open(output_file, "w") as f:
|
|
||||||
f.write(response.text)
|
|
||||||
|
|
||||||
print(f"Successfully downloaded tags for {lang}")
|
# 1. Branch from config (if specified)
|
||||||
except requests.exceptions.RequestException as e:
|
config_branch = config.get("branch")
|
||||||
print(f"Error fetching tags for {lang}: {e}")
|
if config_branch:
|
||||||
|
branches_to_try.append(config_branch)
|
||||||
|
|
||||||
print("All language tags processed")
|
# 2. Default branch from GitHub API
|
||||||
|
default_branch = get_default_branch(owner, repo)
|
||||||
|
if default_branch and default_branch not in branches_to_try:
|
||||||
|
branches_to_try.append(default_branch)
|
||||||
|
|
||||||
|
# 3. Add common branch names
|
||||||
|
for branch in common_branches:
|
||||||
|
if branch not in branches_to_try:
|
||||||
|
branches_to_try.append(branch)
|
||||||
|
|
||||||
|
# Try each branch
|
||||||
|
success = False
|
||||||
|
for branch in branches_to_try:
|
||||||
|
if try_download_tags(owner, repo, branch, directory, output_file):
|
||||||
|
print(f"Successfully downloaded tags for {lang} (branch: {branch})")
|
||||||
|
success = True
|
||||||
|
successes += 1
|
||||||
|
break
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
print(f"Failed to download tags for {lang} after trying all branches")
|
||||||
|
|
||||||
|
# Be nice to GitHub's API
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
print(f"All language tags processed. Downloaded {successes}/{total} successfully.")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue