diff --git a/aider/models.py b/aider/models.py index 9a1dccc9f..c8dfa3ea1 100644 --- a/aider/models.py +++ b/aider/models.py @@ -771,7 +771,9 @@ MODEL_SETTINGS = [ editor_model_name="openrouter/openai/gpt-4o", editor_edit_format="editor-diff", use_repo_map=True, + streaming=False, use_temperature=False, + extra_params=dict(reasoning_effort="high"), ), ModelSettings( "openrouter/qwen/qwen-2.5-coder-32b-instruct", diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 6b7417c5d..11541c3c5 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -644,10 +644,12 @@ def run_test_real( if results_fname.exists(): try: res = json.loads(results_fname.read_text()) - return res + if res.get("test_timeouts", 0) > 0: + print(f"{results_fname} test timeouts, redoing...") + else: + return res except JSONDecodeError: - print(f"{results_fname} failed to parse, skipping") - return + print(f"{results_fname} failed to parse, redoing...") # Read solution and test files from config fnames = [] @@ -800,8 +802,11 @@ def run_test_real( try: errors = run_unit_tests(original_dname, testdir, history_fname, test_files) except subprocess.TimeoutExpired: - errors = "Tests timed out!" - timeouts += 1 + try: + errors = run_unit_tests(original_dname, testdir, history_fname, test_files) + except subprocess.TimeoutExpired: + errors = "Tests timed out!" + timeouts += 1 if errors: test_outcomes.append(False)