diff --git a/assets/models-over-time.png b/assets/models-over-time.png deleted file mode 100644 index eaed94a53..000000000 Binary files a/assets/models-over-time.png and /dev/null differ diff --git a/assets/models-over-time.svg b/assets/models-over-time.svg new file mode 100644 index 000000000..994dab6ae --- /dev/null +++ b/assets/models-over-time.svg @@ -0,0 +1,1742 @@ + + + + + + + + 2024-05-15T11:52:34.512395 + image/svg+xml + + + Matplotlib v3.8.4, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/benchmark/over_time.py b/benchmark/over_time.py index 53cf05715..33e80e67e 100644 --- a/benchmark/over_time.py +++ b/benchmark/over_time.py @@ -1,11 +1,11 @@ import matplotlib.pyplot as plt import yaml -from datetime import datetime -from matplotlib import rc from imgcat import imgcat +from matplotlib import rc + def plot_over_time(yaml_file): - with open(yaml_file, 'r') as file: + with open(yaml_file, "r") as file: data = yaml.safe_load(file) dates = [] @@ -13,10 +13,10 @@ def plot_over_time(yaml_file): models = [] for entry in data: - if 'released' in entry and 'pass_rate_2' in entry: - dates.append(entry['released']) - pass_rates.append(entry['pass_rate_2']) - models.append(entry['model'].split('(')[0].strip()) + if "released" in entry and "pass_rate_2" in entry: + dates.append(entry["released"]) + pass_rates.append(entry["pass_rate_2"]) + models.append(entry["model"].split("(")[0].strip()) plt.rcParams["hatch.linewidth"] = 0.5 plt.rcParams["hatch.color"] = "#444444" @@ -26,22 +26,32 @@ def plot_over_time(yaml_file): fig, ax = plt.subplots(figsize=(10, 5)) ax.grid(axis="y", zorder=0, lw=0.2) for spine in ax.spines.values(): - spine.set_edgecolor('#DDDDDD') + spine.set_edgecolor("#DDDDDD") spine.set_linewidth(0.5) - colors = ['red' if 'gpt-4' in model else 'green' if 'gpt-3.5' in model else 'blue' for model in models] + colors = [ + "red" if "gpt-4" in model else "green" if "gpt-3.5" in model else "blue" for model in models + ] ax.scatter(dates, pass_rates, c=colors, alpha=0.5, s=120) for i, model in enumerate(models): - ax.annotate(model, (dates[i], pass_rates[i]), fontsize=12, alpha=0.75, - xytext=(5, 5), textcoords='offset points') + ax.annotate( + model, + (dates[i], pass_rates[i]), + fontsize=12, + alpha=0.75, + xytext=(5, 5), + textcoords="offset points", + ) - ax.set_xlabel('Model release date', fontsize=18) - ax.set_ylabel('Aider code editing benchmark,\npercent completed correctly', fontsize=18) - ax.set_title('LLM code editing skill by model release date', fontsize=20) + ax.set_xlabel("Model release date", fontsize=18) + ax.set_ylabel("Aider code editing benchmark,\npercent completed correctly", fontsize=18) + ax.set_title("LLM code editing skill by model release date", fontsize=20) plt.tight_layout() plt.savefig("tmp_over_time.png") + plt.savefig("tmp_over_time.svg") imgcat(fig) + # Example usage -plot_over_time('_data/edit_leaderboard.yml') +plot_over_time("_data/edit_leaderboard.yml") diff --git a/docs/leaderboards/index.md b/docs/leaderboards/index.md index 25dfc130e..d33cc6a54 100644 --- a/docs/leaderboards/index.md +++ b/docs/leaderboards/index.md @@ -187,7 +187,7 @@ Therefore, results are available for fewer models. ## LLM code editing skill by model release date -[![connecting to many LLMs](/assets/models-over-time.png)](https://aider.chat/assets/models-over-time.png) +[![connecting to many LLMs](/assets/models-over-time.svg)](https://aider.chat/assets/models-over-time.svg) ## Notes on benchmarking results