refactor: Sort all exercises by solve rate, not by language

This commit is contained in:
Paul Gauthier (aider) 2024-12-18 12:31:50 -08:00
parent 65133b2aef
commit 58812f7f1f

View file

@ -118,10 +118,12 @@ def analyze_exercise_solutions(dirs=None, topn=None):
if exercise not in exercise_solutions: if exercise not in exercise_solutions:
exercise_solutions[exercise] = [] exercise_solutions[exercise] = []
# Group exercises by language # Create list of (language, exercise) pairs with solution stats
by_language = defaultdict(list) exercise_stats = []
total_models = len(valid_entries)
for testcase in all_exercises: for testcase in all_exercises:
# Find language for this testcase from results # Find language for this testcase
lang = "unknown" lang = "unknown"
for r in next(iter(valid_entries))[1]: for r in next(iter(valid_entries))[1]:
try: try:
@ -130,26 +132,22 @@ def analyze_exercise_solutions(dirs=None, topn=None):
break break
except KeyError: except KeyError:
continue continue
by_language[lang].append(testcase)
# Sort languages models = exercise_solutions[testcase]
sorted_languages = sorted(by_language.keys())
# Calculate max lengths for alignment
max_name_len = max(len(testcase) for testcase in all_exercises)
total_models = len(valid_entries)
# Print exercises grouped by language
for lang in sorted_languages:
print(f"\n{lang.upper()}:")
lang_exercises = [(ex, exercise_solutions[ex]) for ex in by_language[lang]]
# Sort by number of models that solved each exercise
lang_exercises.sort(key=lambda x: len(x[1]), reverse=True)
for i, (testcase, models) in enumerate(lang_exercises, 1):
num_solved = len(models) num_solved = len(models)
percent = (num_solved / total_models) * 100 percent = (num_solved / total_models) * 100
print(f"{i:>3}. {testcase:<{max_name_len}} : {num_solved:>3} solved ({percent:>5.1f}%)") exercise_stats.append((lang, testcase, num_solved, percent))
# Sort all exercises by solve rate
exercise_stats.sort(key=lambda x: x[2], reverse=True)
# Calculate max lengths for alignment
max_name_len = max(len(f"{lang}/{ex}") for lang, ex, _, _ in exercise_stats)
# Print all exercises sorted by solve rate
print("\nAll Exercises (sorted by solve rate):")
for i, (lang, testcase, num_solved, percent) in enumerate(exercise_stats, 1):
print(f"{i:>3}. {lang}/{testcase:<{max_name_len}} : {num_solved:>3} solved ({percent:>5.1f}%)")
print("\nSummary:") print("\nSummary:")
solved_at_least_once = len([ex for ex, models in exercise_solutions.items() if models]) solved_at_least_once = len([ex for ex, models in exercise_solutions.items() if models])