aider/scripts/clean_metadata.py
2025-04-20 11:03:41 -07:00

124 lines
4.5 KiB
Python

#!/usr/bin/env python
import difflib
import json
from pathlib import Path
import json5
def main():
script_dir = Path(__file__).parent.resolve()
# Adjust path relative to the script's location in the aider repo
litellm_path = script_dir.parent / "../litellm/model_prices_and_context_window.json"
aider_path = script_dir / "../aider/resources/model-metadata.json"
if not litellm_path.exists():
print(f"Error: LiteLLM metadata file not found at {litellm_path}")
return
if not aider_path.exists():
print(f"Error: Aider metadata file not found at {aider_path}")
return
try:
with open(litellm_path, "r") as f:
litellm_data = json.load(f)
except json.JSONDecodeError as e:
print(f"Error decoding JSON from {litellm_path}: {e}")
return
except Exception as e:
print(f"Error reading {litellm_path}: {e}")
return
try:
# Use json5 for the aider metadata file as it might contain comments
with open(aider_path, "r") as f:
aider_data = json5.load(f)
except json.JSONDecodeError as e:
print(f"Error decoding JSON from {aider_path}: {e}")
return
except Exception as e:
print(f"Error reading {aider_path}: {e}")
return
litellm_keys = set(litellm_data.keys())
aider_keys = set(aider_data.keys())
common_keys = sorted(list(litellm_keys.intersection(aider_keys)))
removed_count = 0
if common_keys:
print("Comparing common models found in both files:\n")
for key in common_keys:
print(f"--- {key} (aider) ---")
print(f"+++ {key} (litellm) +++")
litellm_entry = litellm_data.get(key, {})
aider_entry = aider_data.get(key, {})
# Convert dicts to formatted JSON strings for comparison
litellm_json = json.dumps(litellm_entry, indent=4, sort_keys=True).splitlines()
aider_json = json.dumps(aider_entry, indent=4, sort_keys=True).splitlines()
# Generate unified diff
diff = difflib.unified_diff(
aider_json,
litellm_json,
fromfile=f"{key} (aider)",
tofile=f"{key} (litellm)",
lineterm="",
n=max(len(litellm_json), len(aider_json)), # Show all lines
)
# Print the diff, skipping the header lines generated by unified_diff
diff_lines = list(diff)[2:]
if not diff_lines:
print("(No differences found)")
else:
for line in diff_lines:
# Add color for better readability (optional, requires a library
# like 'termcolor' or manual ANSI codes)
# Simple +/- indication is standard for diffs
print(line)
print("\n" + "=" * 40)
# Ask user if they want to remove the entry from aider's metadata
response = (
input(f"Remove '{key}' from aider/resources/model-metadata.json? (y/N): ")
.strip()
.lower()
)
if response == "y":
if key in aider_data:
print(f"Removing '{key}' from aider data...")
del aider_data[key]
removed_count += 1
# Write the modified data back immediately
try:
with open(aider_path, "w") as f:
json.dump(aider_data, f, indent=4, sort_keys=True)
f.write("\n")
print(f"Successfully removed '{key}' and updated {aider_path}.")
except Exception as e:
print(f"Error writing updated data to {aider_path} after removing {key}: {e}")
# Exit or handle error appropriately? For now, just print.
else:
print(f"'{key}' not found in aider data (already removed?).")
else:
print(f"Keeping '{key}'.")
print("-" * 40 + "\n") # Separator for the next model
else:
print("No common models found between the two files.")
return # Exit if no common keys
# Final summary message
if removed_count > 0:
print(f"\nFinished comparing. A total of {removed_count} entr(y/ies) were removed.")
else:
print("\nFinished comparing. No entries were removed.")
if __name__ == "__main__":
main()