diff --git a/benchmark/problem_stats.py b/benchmark/problem_stats.py
index 1e9925559..c09c96745 100755
--- a/benchmark/problem_stats.py
+++ b/benchmark/problem_stats.py
@@ -181,13 +181,16 @@ def analyze_exercise_solutions(dirs=None, topn=None, copy_hard_set=False):
     )
 
     print(f"Total exercises solved at least once: {solved_at_least_once}")
-    # print out these never solved use lang/exercises/practice/ex ai!
     print(f"Never solved by any model: {solved_by_none}")
     if solved_by_none > 0:
         print("\nExercises never solved by any model:")
         unsolved = [ex for ex, models in exercise_solutions.items() if not models]
         for ex in sorted(unsolved):
-            print(f"  {ex}")
+            # Split into language and exercise parts
+            lang, exercise = ex.split('/')
+            # Reconstruct path in desired format
+            formatted_path = f"{lang}/exercises/practice/{exercise}"
+            print(f"  {formatted_path}")
     print(f"\nSolved by all models: {solved_by_all}")
     print(
         f"Total exercises: {len(all_exercises)} = {solved_by_none} (none) + {solved_by_all} (all) +"