diff --git a/benchmark/problem_stats.py b/benchmark/problem_stats.py index 1e9925559..c09c96745 100755 --- a/benchmark/problem_stats.py +++ b/benchmark/problem_stats.py @@ -181,13 +181,16 @@ def analyze_exercise_solutions(dirs=None, topn=None, copy_hard_set=False): ) print(f"Total exercises solved at least once: {solved_at_least_once}") - # print out these never solved use lang/exercises/practice/ex ai! print(f"Never solved by any model: {solved_by_none}") if solved_by_none > 0: print("\nExercises never solved by any model:") unsolved = [ex for ex, models in exercise_solutions.items() if not models] for ex in sorted(unsolved): - print(f" {ex}") + # Split into language and exercise parts + lang, exercise = ex.split('/') + # Reconstruct path in desired format + formatted_path = f"{lang}/exercises/practice/{exercise}" + print(f" {formatted_path}") print(f"\nSolved by all models: {solved_by_all}") print( f"Total exercises: {len(all_exercises)} = {solved_by_none} (none) + {solved_by_all} (all) +"