aider/scripts/clean_metadata.py

88 lines
2.9 KiB
Python

#!/usr/bin/env python
import json
import difflib
from pathlib import Path
import json5
def main():
script_dir = Path(__file__).parent.resolve()
litellm_path = script_dir / "../../litellm/model_prices_and_context_window.json"
aider_path = script_dir / "../aider/resources/model-metadata.json"
if not litellm_path.exists():
print(f"Error: LiteLLM metadata file not found at {litellm_path}")
return
if not aider_path.exists():
print(f"Error: Aider metadata file not found at {aider_path}")
return
try:
with open(litellm_path, "r") as f:
litellm_data = json.load(f)
except json.JSONDecodeError as e:
print(f"Error decoding JSON from {litellm_path}: {e}")
return
except Exception as e:
print(f"Error reading {litellm_path}: {e}")
return
try:
# Use json5 for the aider metadata file as it might contain comments
with open(aider_path, "r") as f:
aider_data = json5.load(f)
except json.JSONDecodeError as e:
print(f"Error decoding JSON from {aider_path}: {e}")
return
except Exception as e:
print(f"Error reading {aider_path}: {e}")
return
litellm_keys = set(litellm_data.keys())
aider_keys = set(aider_data.keys())
common_keys = sorted(list(litellm_keys.intersection(aider_keys)))
if common_keys:
print("Comparing common models found in both files:\n")
for key in common_keys:
print(f"--- {key} (litellm) ---")
print(f"+++ {key} (aider) +++")
litellm_entry = litellm_data.get(key, {})
aider_entry = aider_data.get(key, {})
# Convert dicts to formatted JSON strings for comparison
litellm_json = json.dumps(litellm_entry, indent=4, sort_keys=True).splitlines()
aider_json = json.dumps(aider_entry, indent=4, sort_keys=True).splitlines()
# Generate unified diff
diff = difflib.unified_diff(
litellm_json,
aider_json,
fromfile=f"{key} (litellm)",
tofile=f"{key} (aider)",
lineterm="",
n=max(len(litellm_json), len(aider_json)), # Show all lines
)
# Print the diff, skipping the header lines generated by unified_diff
diff_lines = list(diff)[2:]
if not diff_lines:
print("(No differences found)")
else:
for line in diff_lines:
# Add color for better readability (optional, requires a library like 'termcolor' or manual ANSI codes)
# Simple +/- indication is standard for diffs
print(line)
print("\n" + "=" * 40 + "\n")
else:
print("No common models found between the two files.")
if __name__ == "__main__":
main()