diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index aa4bb8132..397262824 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -549,15 +549,15 @@ def run_test_real( chat_history_file=history_fname, ) - ask_model = models.Model("gpt-4o") - # ask_model = models.Model("openrouter/anthropic/claude-3.5-sonnet") + # ask_model = models.Model("gpt-4o") + ask_model = models.Model("openrouter/anthropic/claude-3.5-sonnet") # ask_model = models.Model("openrouter/deepseek/deepseek-coder") # whole_model = models.Model("gpt-4o") # whole_model = models.Model("openrouter/anthropic/claude-3.5-sonnet") # whole_model = models.Model("openrouter/deepseek/deepseek-coder") - # whole_model = models.Model("openrouter/anthropic/claude-3-haiku-20240307") - whole_model = models.Model("gpt-4o-mini") + whole_model = models.Model("openrouter/anthropic/claude-3-haiku-20240307") + # whole_model = models.Model("gpt-4o-mini") main_model = ask_model edit_format = "ask-whole"