diff --git a/benchmark/over_time.py b/benchmark/over_time.py index 884dfefc3..f04da3730 100644 --- a/benchmark/over_time.py +++ b/benchmark/over_time.py @@ -50,7 +50,10 @@ def plot_over_time(yaml_file): spine.set_linewidth(0.5) colors = [ - "red" if "gpt-4" in model else "green" if "gpt-3.5" in model else "blue" for model in models + "orange" if "-4o-" in model and "gpt-4o-mini" not in model + else "red" if "gpt-4" in model + else "green" if "gpt-3.5" in model + else "blue" for model in models ] ax.scatter(dates, pass_rates, c=colors, alpha=0.5, s=120)