From 6ddb8a7d88b384eece891d7e80d28bedf16a2b5b Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)" <paul@aider.chat>
Date: Wed, 18 Dec 2024 13:18:31 -0800
Subject: [PATCH] feat: Add hard set problem counts by language

---
 benchmark/problem_stats.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/benchmark/problem_stats.py b/benchmark/problem_stats.py
index fd9be348b..f69578629 100755
--- a/benchmark/problem_stats.py
+++ b/benchmark/problem_stats.py
@@ -187,24 +187,28 @@ def analyze_exercise_solutions(dirs=None, topn=None, copy_hard_set=False):
     hard_set = {ex for ex, models in exercise_solutions.items() if len(models) <= HARD_SET_NUM}
     print(f"Total hard set exercises: {len(hard_set)}")
 
-    # Count total problems and unsolved problems by language
+    # Count total problems, unsolved problems, and hard set problems by language
     lang_totals = defaultdict(int)
     lang_unsolved = defaultdict(int)
+    lang_hard_set = defaultdict(int)
 
     for exercise in all_exercises:
         lang = exercise.split("/")[1]  # Get language from path
         lang_totals[lang] += 1
         if not exercise_solutions[exercise]:  # No models solved this exercise
             lang_unsolved[lang] += 1
+        if exercise in hard_set:  # Exercise is in the hard set
+            lang_hard_set[lang] += 1
 
-    print("\nUnsolved problems by language:")
-    print(f"{'Language':<12} {'Count':>5} {'Total':>7} {'Percent':>8}")
-    print("-" * 35)
+    print("\nUnsolved and hard set problems by language:")
+    print(f"{'Language':<12} {'Unsolved':>8} {'Hard Set':>9} {'Total':>7} {'Percent':>8}")
+    print("-" * 47)
     for lang in sorted(lang_totals.keys()):
         count = lang_unsolved[lang]
+        hard = lang_hard_set[lang]
         total = lang_totals[lang]
         pct = (count / total) * 100
-        print(f"{lang:<12} {count:>5} {total:>7} {pct:>7.1f}%")
+        print(f"{lang:<12} {count:>8} {hard:>9} {total:>7} {pct:>7.1f}%")
     print()
 
     # For each model, compute performance on hard set