diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 60e482c2e..7803be6ac 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -394,7 +394,8 @@ def run_unit_tests(testdir, history_fname): all_tests_passed = False print(f"Test {test_file} failed") - res = result.stdout + # remove timing info, to avoid randomizing the response to GPT + res = re.sub(r" in \d+\.\d+s", " in 1.0s", result.stdout) except subprocess.TimeoutExpired: all_tests_passed = False