From 34da3dd3d78bc92349e10413fe41efabaa2859df Mon Sep 17 00:00:00 2001 From: "Paul Gauthier (aider)" Date: Wed, 18 Dec 2024 13:09:42 -0800 Subject: [PATCH] feat: Show percent of unsolved problems per language --- benchmark/problem_stats.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/benchmark/problem_stats.py b/benchmark/problem_stats.py index e147fb4b8..d81f76bc1 100755 --- a/benchmark/problem_stats.py +++ b/benchmark/problem_stats.py @@ -186,17 +186,26 @@ def analyze_exercise_solutions(dirs=None, topn=None): hard_set = {ex for ex, models in exercise_solutions.items() if len(models) <= HARD_SET_NUM} print(f"Total hard set exercises: {len(hard_set)}") - # Count problems by language in hard set - lang_counts = defaultdict(int) + # Count total problems and hard set problems by language + lang_totals = defaultdict(int) + lang_hard_counts = defaultdict(int) + + for exercise in all_exercises: + lang = exercise.split("/")[1] # Get language from path + lang_totals[lang] += 1 + for exercise in hard_set: lang = exercise.split("/")[1] # Get language from path - lang_counts[lang] += 1 + lang_hard_counts[lang] += 1 print("\nHard set problems by language:") - print(f"{'Language':<12} {'Count':>5}") - print("-" * 18) - for lang, count in sorted(lang_counts.items()): - print(f"{lang:<12} {count:>5}") + print(f"{'Language':<12} {'Count':>5} {'Percent':>8}") + print("-" * 28) + for lang in sorted(lang_totals.keys()): + count = lang_hard_counts[lang] + total = lang_totals[lang] + pct = (count / total) * 100 + print(f"{lang:<12} {count:>5} {pct:>7.1f}%") print() # For each model, compute performance on hard set