diff --git a/benchmark/over_time.py b/benchmark/over_time.py index 7083208ce..90504fecc 100644 --- a/benchmark/over_time.py +++ b/benchmark/over_time.py @@ -1,6 +1,7 @@ import matplotlib.pyplot as plt import yaml from datetime import datetime +from matplotlib import rc def plot_over_time(yaml_file): with open(yaml_file, 'r') as file: @@ -16,19 +17,25 @@ def plot_over_time(yaml_file): pass_rates.append(entry['pass_rate_2']) models.append(entry['model']) - plt.figure(figsize=(10, 6)) + plt.rcParams["hatch.linewidth"] = 0.5 + plt.rcParams["hatch.color"] = "#444444" + + rc("font", **{"family": "sans-serif", "sans-serif": ["Helvetica"], "size": 10}) + + fig, ax = plt.subplots(figsize=(6, 4)) + ax.grid(axis="y", zorder=0, lw=0.2) colors = ['red' if 'gpt-4' in model else 'green' if 'gpt-3.5' in model else 'blue' for model in models] - plt.scatter(dates, pass_rates, c=colors, alpha=0.5) + ax.scatter(dates, pass_rates, c=colors, alpha=0.5) for i, model in enumerate(models): - plt.annotate(model, (dates[i], pass_rates[i]), fontsize=8, alpha=0.75) + ax.annotate(model, (dates[i], pass_rates[i]), fontsize=8, alpha=0.75) - plt.xlabel('Release Date') - plt.ylabel('Pass Rate 2') - plt.title('Model Performance Over Time') - plt.grid(True) + ax.set_xlabel('Release Date') + ax.set_ylabel('Pass Rate 2') + ax.set_title('Model Performance Over Time') plt.tight_layout() - plt.show() + plt.savefig("tmp_over_time.svg") + imgcat(fig) # Example usage plot_over_time('_data/edit_leaderboard.yml')