diff --git a/scripts/tsl_pack_langs.py b/scripts/tsl_pack_langs.py index 846c22801..9db702878 100755 --- a/scripts/tsl_pack_langs.py +++ b/scripts/tsl_pack_langs.py @@ -3,10 +3,42 @@ import json import os import sys +import time import requests +def get_default_branch(owner, repo): + """Get the default branch of a GitHub repository using the API.""" + api_url = f"https://api.github.com/repos/{owner}/{repo}" + try: + response = requests.get(api_url) + response.raise_for_status() + return response.json().get("default_branch") + except requests.exceptions.RequestException: + return None + + +def try_download_tags(owner, repo, branch, directory, output_path): + """Try to download tags.scm from a specific branch.""" + base_url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}" + if directory: + tags_url = f"{base_url}/{directory}/queries/tags.scm" + else: + tags_url = f"{base_url}/queries/tags.scm" + + try: + response = requests.get(tags_url) + response.raise_for_status() + + # Save the file + with open(output_path, "w") as f: + f.write(response.text) + return True + except requests.exceptions.RequestException: + return False + + def main(): # Path to the language definitions file lang_def_path = "../../tmp/tree-sitter-language-pack/sources/language_definitions.json" @@ -17,6 +49,9 @@ def main(): # Create the output directory if it doesn't exist os.makedirs(output_dir, exist_ok=True) + # Common branch names to try if API fails and config branch doesn't work + common_branches = ["main", "master", "dev", "develop"] + try: # Load the language definitions with open(lang_def_path, "r") as f: @@ -26,18 +61,20 @@ def main(): sys.exit(1) print(f"Found {len(lang_defs)} language definitions") - + # Process each language + successes = 0 + total = len(lang_defs) + for lang, config in lang_defs.items(): print(f"Processing {lang}...") - # Extract repo URL and branch from the config + # Extract repo URL from the config repo_url = config.get("repo") if not repo_url: print(f"Skipping {lang}: No repository URL found") continue - branch = config.get("branch", "master") directory = config.get("directory", "") # Parse the GitHub repository URL @@ -46,35 +83,58 @@ def main(): continue # Extract the owner and repo name from the URL - _, _, _, owner, repo = repo_url.rstrip("/").split("/") - - # Construct the raw file URL - # Build the GitHub raw content path - base_url = f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}" - if directory: - tags_url = f"{base_url}/{directory}/queries/tags.scm" - else: - tags_url = f"{base_url}/queries/tags.scm" + parts = repo_url.rstrip("/").split("/") + if len(parts) < 5: + print(f"Skipping {lang}: Invalid GitHub URL format") + continue + + owner = parts[-2] + repo = parts[-1] # Create the language directory in the output path lang_dir = os.path.join(output_dir, lang) - os.makedirs(os.path.join(lang_dir, "queries"), exist_ok=True) + queries_dir = os.path.join(lang_dir, "queries") + os.makedirs(queries_dir, exist_ok=True) + output_file = os.path.join(queries_dir, "tags.scm") - # Fetch the tags.scm file - try: - response = requests.get(tags_url) - response.raise_for_status() # Raise an exception for HTTP errors + # Try branches in this order: + # 1. Branch specified in the config + # 2. Default branch from GitHub API + # 3. Common branch names (main, master, etc.) + + branches_to_try = [] + + # 1. Branch from config (if specified) + config_branch = config.get("branch") + if config_branch: + branches_to_try.append(config_branch) + + # 2. Default branch from GitHub API + default_branch = get_default_branch(owner, repo) + if default_branch and default_branch not in branches_to_try: + branches_to_try.append(default_branch) + + # 3. Add common branch names + for branch in common_branches: + if branch not in branches_to_try: + branches_to_try.append(branch) - # Save the file - output_file = os.path.join(lang_dir, "queries", "tags.scm") - with open(output_file, "w") as f: - f.write(response.text) + # Try each branch + success = False + for branch in branches_to_try: + if try_download_tags(owner, repo, branch, directory, output_file): + print(f"Successfully downloaded tags for {lang} (branch: {branch})") + success = True + successes += 1 + break + + if not success: + print(f"Failed to download tags for {lang} after trying all branches") + + # Be nice to GitHub's API + time.sleep(0.1) - print(f"Successfully downloaded tags for {lang}") - except requests.exceptions.RequestException as e: - print(f"Error fetching tags for {lang}: {e}") - - print("All language tags processed") + print(f"All language tags processed. Downloaded {successes}/{total} successfully.") if __name__ == "__main__":