style: Run linter on benchmark script

2025-05-28 16:25:00 +00:00 · 2024-12-17 14:06:56 -08:00 · 2024-12-17 14:06:56 -08:00 · 7bfc2e0e74
commit 7bfc2e0e74
parent 9cc674c283
1 changed files with 21 additions and 21 deletions
--- a/benchmark/problem_stats.py
+++ b/benchmark/problem_stats.py
@ -1,9 +1,11 @@
 #!/usr/bin/env python

-import yaml
-from pathlib import Path
-from collections import defaultdict
 import json
+from collections import defaultdict
+from pathlib import Path
+
+import yaml
+

 def load_results(dirname):
    """Load all result files from a benchmark directory"""
@ -11,7 +13,7 @@ def load_results(dirname):
    benchmark_dir = Path("tmp.benchmarks") / dirname
    if not benchmark_dir.exists():
        return None
-    
+
    all_results = []
    for fname in benchmark_dir.glob("*/.aider.results.json"):
        try:
@ -22,61 +24,59 @@ def load_results(dirname):
            continue
    return all_results

+
 def analyze_exercise_solutions():
    # Load the leaderboard data
    with open("aider/website/_data/edit_leaderboard.yml") as f:
        leaderboard = yaml.safe_load(f)
-    
+
    # Track which models solved each exercise
    exercise_solutions = defaultdict(list)
-    
+
    for entry in leaderboard:
        dirname = entry["dirname"]
        model = entry["model"]
-        
+
        results = load_results(dirname)
        if not results:
            print(f"Could not load results for {dirname}")
            continue
-            
+
        for result in results:
            testcase = result.get("testcase")
            if not testcase:
                continue
-                
+
            # Consider it solved if the last test attempt passed
            tests_outcomes = result.get("tests_outcomes", [])
            if tests_outcomes and tests_outcomes[-1]:
                exercise_solutions[testcase].append(model)
-    
+
    # Print statistics
    print("\nExercise Solution Statistics:")
    print("-" * 40)
-    
+
    # Sort by number of models that solved each exercise
-    sorted_exercises = sorted(
-        exercise_solutions.items(),
-        key=lambda x: len(x[1]),
-        reverse=True
-    )
-    
+    sorted_exercises = sorted(exercise_solutions.items(), key=lambda x: len(x[1]), reverse=True)
+
    for testcase, models in sorted_exercises:
        print(f"{testcase}: solved by {len(models)} models")
-        #print(f"  Models: {', '.join(models)}")
-    
+        # print(f"  Models: {', '.join(models)}")
+
    print("\nSummary:")
    print(f"Total exercises solved at least once: {len(exercise_solutions)}")
    never_solved = 133 - len(exercise_solutions)
    print(f"Never solved by any model: {never_solved}")
-    
+
    # Distribution of solutions
    solved_by_counts = defaultdict(int)
    for models in exercise_solutions.values():
        solved_by_counts[len(models)] += 1
-    
+
    print("\nDistribution of solutions:")
    for count in sorted(solved_by_counts.keys()):
        print(f"Solved by {count} models: {solved_by_counts[count]} exercises")

+
 if __name__ == "__main__":
    analyze_exercise_solutions()