feat: Compare dicts directly to avoid spurious diffs in metadata cleaning

2025-05-20 04:14:59 +00:00 · 2025-04-20 11:08:53 -07:00 · 2025-04-20 11:08:53 -07:00 · d8caa76bc8
commit d8caa76bc8
parent 506c3c928e
1 changed files with 12 additions and 3 deletions
--- a/scripts/clean_metadata.py
+++ b/scripts/clean_metadata.py
@ -173,10 +173,17 @@ def main():
            aider_entry = aider_data.get(key, {})

            # Convert dicts to formatted JSON strings for comparison
-            litellm_json = json.dumps(litellm_entry, indent=4, sort_keys=True).splitlines()
-            aider_json = json.dumps(aider_entry, indent=4, sort_keys=True).splitlines()
+            # First, compare the dictionaries directly for semantic equality
+            if litellm_entry == aider_entry:
+                print(f"'{key}': Entries are semantically identical.")
+                print("\n" + "=" * 40)
+                print("-" * 40 + "\n") # Separator for the next model
+                continue # Skip diff and removal prompt for identical entries

            # Generate unified diff
+            # If dictionaries differ, generate JSON strings to show the diff
+            litellm_json = json.dumps(litellm_entry, indent=4, sort_keys=True).splitlines()
+            aider_json = json.dumps(aider_entry, indent=4, sort_keys=True).splitlines()
            diff = difflib.unified_diff(
                aider_json,
                litellm_json,
@ -189,7 +196,9 @@ def main():
            # Print the diff, skipping the header lines generated by unified_diff
            diff_lines = list(diff)[2:]
            if not diff_lines:
-                print("(No differences found)")
+                # This case should ideally not be reached if dict comparison was done first,
+                # but kept as a fallback.
+                print("(No textual differences found, though dictionaries might differ in type/order)")
            else:
                for line in diff_lines:
                    # Add color for better readability (optional, requires a library