diff --git a/benchmark/problem_stats.py b/benchmark/problem_stats.py
index 68c2535e7..921cfd3f0 100755
--- a/benchmark/problem_stats.py
+++ b/benchmark/problem_stats.py
@@ -186,23 +186,21 @@ def analyze_exercise_solutions(dirs=None, topn=None):
     hard_set = {ex for ex, models in exercise_solutions.items() if len(models) <= HARD_SET_NUM}
     print(f"Total hard set exercises: {len(hard_set)}")
 
-    # Count total problems and hard set problems by language
+    # Count total problems and unsolved problems by language
     lang_totals = defaultdict(int)
-    lang_hard_counts = defaultdict(int)
+    lang_unsolved = defaultdict(int)
 
     for exercise in all_exercises:
         lang = exercise.split("/")[1]  # Get language from path
         lang_totals[lang] += 1
+        if not exercise_solutions[exercise]:  # No models solved this exercise
+            lang_unsolved[lang] += 1
 
-    for exercise in hard_set:
-        lang = exercise.split("/")[1]  # Get language from path
-        lang_hard_counts[lang] += 1
-
-    print("\nHard set problems by language:")
+    print("\nUnsolved problems by language:")
     print(f"{'Language':<12} {'Count':>5} {'Percent':>8}")
     print("-" * 28)
     for lang in sorted(lang_totals.keys()):
-        count = lang_hard_counts[lang]
+        count = lang_unsolved[lang]
         total = lang_totals[lang]
         pct = (count / total) * 100
         print(f"{lang:<12} {count:>5} {pct:>7.1f}%")