diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
index 9f1ff2b4c..6b7417c5d 100755
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -506,6 +506,7 @@ def summarize_results(dirname):
         percents[i] = pass_rate
         # console.print(f"{pass_rate:.1f}% correct after try {i+1}")
         setattr(res, f"pass_rate_{i + 1}", f"{pass_rate:.1f}")
+        setattr(res, f"pass_num_{i + 1}", passed_tests[i])
 
     print(f"- dirname: {dirname.name}")
     style = None if res.completed_tests in NUM_TESTS else "red"
@@ -521,6 +522,8 @@ def summarize_results(dirname):
 
     for i in range(tries):
         print(f"  pass_rate_{i + 1}: {percents[i]:.1f}")
+    for i in range(tries):
+        print(f"  pass_num_{i + 1}: {passed_tests[i]}")
 
     pct_well_formed = 1.0 - res.num_with_malformed_responses / res.completed_tests
     print(f"  percent_cases_well_formed: {pct_well_formed * 100:.1f}")
@@ -661,6 +664,9 @@ def run_test_real(
 
     test_files = config.get("files", {}).get("test", [])
 
+    ignore_files = set(["Cargo.toml"])
+    ignore_files.update(test_files)
+
     # Copy all solution files
     for file_path in solution_files:
         src = testdir / Path(file_path)
@@ -746,7 +752,7 @@ def run_test_real(
         # auto_lint=False,  # disabled for code-in-json experiments
         cache_prompts=True,
         suggest_shell_commands=False,
-        ignore_mentions=set(test_files),
+        ignore_mentions=ignore_files,
     )
     dump(coder.ignore_mentions)
 
@@ -852,7 +858,7 @@ def run_test_real(
 
 
 def run_unit_tests(original_dname, testdir, history_fname, test_files):
-    timeout = 60
+    timeout = 60 * 3
 
     # Map of file extensions to test commands
     TEST_COMMANDS = {
diff --git a/benchmark/problem_stats.py b/benchmark/problem_stats.py
index f69578629..ca4e48ed9 100755
--- a/benchmark/problem_stats.py
+++ b/benchmark/problem_stats.py
@@ -201,13 +201,13 @@ def analyze_exercise_solutions(dirs=None, topn=None, copy_hard_set=False):
             lang_hard_set[lang] += 1
 
     print("\nUnsolved and hard set problems by language:")
-    print(f"{'Language':<12} {'Unsolved':>8} {'Hard Set':>9} {'Total':>7} {'Percent':>8}")
+    print(f"{'Language':<12} {'Unsolved':>8} {'Hard Set':>9} {'Total':>7} {'%hardUnsolved':>8}")
     print("-" * 47)
     for lang in sorted(lang_totals.keys()):
         count = lang_unsolved[lang]
         hard = lang_hard_set[lang]
         total = lang_totals[lang]
-        pct = (count / total) * 100
+        pct = (count / hard) * 100
         print(f"{lang:<12} {count:>8} {hard:>9} {total:>7} {pct:>7.1f}%")
     print()