diff --git a/benchmark/problem_stats.py b/benchmark/problem_stats.py index 68c2535e7..921cfd3f0 100755 --- a/benchmark/problem_stats.py +++ b/benchmark/problem_stats.py @@ -186,23 +186,21 @@ def analyze_exercise_solutions(dirs=None, topn=None): hard_set = {ex for ex, models in exercise_solutions.items() if len(models) <= HARD_SET_NUM} print(f"Total hard set exercises: {len(hard_set)}") - # Count total problems and hard set problems by language + # Count total problems and unsolved problems by language lang_totals = defaultdict(int) - lang_hard_counts = defaultdict(int) + lang_unsolved = defaultdict(int) for exercise in all_exercises: lang = exercise.split("/")[1] # Get language from path lang_totals[lang] += 1 + if not exercise_solutions[exercise]: # No models solved this exercise + lang_unsolved[lang] += 1 - for exercise in hard_set: - lang = exercise.split("/")[1] # Get language from path - lang_hard_counts[lang] += 1 - - print("\nHard set problems by language:") + print("\nUnsolved problems by language:") print(f"{'Language':<12} {'Count':>5} {'Percent':>8}") print("-" * 28) for lang in sorted(lang_totals.keys()): - count = lang_hard_counts[lang] + count = lang_unsolved[lang] total = lang_totals[lang] pct = (count / total) * 100 print(f"{lang:<12} {count:>5} {pct:>7.1f}%")