From 34da3dd3d78bc92349e10413fe41efabaa2859df Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)" <paul@aider.chat>
Date: Wed, 18 Dec 2024 13:09:42 -0800
Subject: [PATCH] feat: Show percent of unsolved problems per language

---
 benchmark/problem_stats.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/benchmark/problem_stats.py b/benchmark/problem_stats.py
index e147fb4b8..d81f76bc1 100755
--- a/benchmark/problem_stats.py
+++ b/benchmark/problem_stats.py
@@ -186,17 +186,26 @@ def analyze_exercise_solutions(dirs=None, topn=None):
     hard_set = {ex for ex, models in exercise_solutions.items() if len(models) <= HARD_SET_NUM}
     print(f"Total hard set exercises: {len(hard_set)}")
 
-    # Count problems by language in hard set
-    lang_counts = defaultdict(int)
+    # Count total problems and hard set problems by language
+    lang_totals = defaultdict(int)
+    lang_hard_counts = defaultdict(int)
+    
+    for exercise in all_exercises:
+        lang = exercise.split("/")[1]  # Get language from path
+        lang_totals[lang] += 1
+        
     for exercise in hard_set:
         lang = exercise.split("/")[1]  # Get language from path
-        lang_counts[lang] += 1
+        lang_hard_counts[lang] += 1
 
     print("\nHard set problems by language:")
-    print(f"{'Language':<12} {'Count':>5}")
-    print("-" * 18)
-    for lang, count in sorted(lang_counts.items()):
-        print(f"{lang:<12} {count:>5}")
+    print(f"{'Language':<12} {'Count':>5} {'Percent':>8}")
+    print("-" * 28)
+    for lang in sorted(lang_totals.keys()):
+        count = lang_hard_counts[lang]
+        total = lang_totals[lang]
+        pct = (count / total) * 100
+        print(f"{lang:<12} {count:>5} {pct:>7.1f}%")
     print()
 
     # For each model, compute performance on hard set