diff --git a/benchmark/over_time.py b/benchmark/over_time.py index 2bec09469..1b4e0cd6c 100644 --- a/benchmark/over_time.py +++ b/benchmark/over_time.py @@ -23,18 +23,18 @@ def plot_over_time(yaml_file): rc("font", **{"family": "sans-serif", "sans-serif": ["Helvetica"], "size": 10}) - fig, ax = plt.subplots(figsize=(6, 4)) + fig, ax = plt.subplots(figsize=(10, 4)) ax.grid(axis="y", zorder=0, lw=0.2) colors = ['red' if 'gpt-4' in model else 'green' if 'gpt-3.5' in model else 'blue' for model in models] ax.scatter(dates, pass_rates, c=colors, alpha=0.5) for i, model in enumerate(models): - ax.annotate(model, (dates[i], pass_rates[i]), fontsize=8, alpha=0.75, + ax.annotate(model, (dates[i], pass_rates[i]), fontsize=12, alpha=0.75, xytext=(5, 5), textcoords='offset points') - ax.set_xlabel('Release Date') - ax.set_ylabel('Pass Rate 2') - ax.set_title('Model Performance Over Time') + ax.set_xlabel('Model release date') + ax.set_ylabel('Aider code editing benchmark,\npercent completed correctly') + ax.set_title('LLM code editing skill by model release date') plt.tight_layout() plt.savefig("tmp_over_time.png") imgcat(fig)