diff --git a/aider/models.py b/aider/models.py
index 9a1dccc9f..c8dfa3ea1 100644
--- a/aider/models.py
+++ b/aider/models.py
@@ -771,7 +771,9 @@ MODEL_SETTINGS = [
         editor_model_name="openrouter/openai/gpt-4o",
         editor_edit_format="editor-diff",
         use_repo_map=True,
+        streaming=False,
         use_temperature=False,
+        extra_params=dict(reasoning_effort="high"),
     ),
     ModelSettings(
         "openrouter/qwen/qwen-2.5-coder-32b-instruct",
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
index 6b7417c5d..11541c3c5 100755
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -644,10 +644,12 @@ def run_test_real(
     if results_fname.exists():
         try:
             res = json.loads(results_fname.read_text())
-            return res
+            if res.get("test_timeouts", 0) > 0:
+                print(f"{results_fname} test timeouts, redoing...")
+            else:
+                return res
         except JSONDecodeError:
-            print(f"{results_fname} failed to parse, skipping")
-            return
+            print(f"{results_fname} failed to parse, redoing...")
 
     # Read solution and test files from config
     fnames = []
@@ -800,8 +802,11 @@ def run_test_real(
         try:
             errors = run_unit_tests(original_dname, testdir, history_fname, test_files)
         except subprocess.TimeoutExpired:
-            errors = "Tests timed out!"
-            timeouts += 1
+            try:
+                errors = run_unit_tests(original_dname, testdir, history_fname, test_files)
+            except subprocess.TimeoutExpired:
+                errors = "Tests timed out!"
+                timeouts += 1
 
         if errors:
             test_outcomes.append(False)