diff --git a/benchmark/over_time.py b/benchmark/over_time.py new file mode 100644 index 000000000..b3fec4b95 --- /dev/null +++ b/benchmark/over_time.py @@ -0,0 +1,33 @@ +import matplotlib.pyplot as plt +import yaml +from datetime import datetime + +def plot_over_time(yaml_file): + with open(yaml_file, 'r') as file: + data = yaml.safe_load(file) + + dates = [] + pass_rates = [] + models = [] + + for entry in data: + if 'released' in entry and 'pass_rate_2' in entry: + dates.append(datetime.strptime(entry['released'], '%Y-%m-%d')) + pass_rates.append(entry['pass_rate_2']) + models.append(entry['model']) + + plt.figure(figsize=(10, 6)) + plt.scatter(dates, pass_rates, c='blue', alpha=0.5) + + for i, model in enumerate(models): + plt.annotate(model, (dates[i], pass_rates[i]), fontsize=8, alpha=0.75) + + plt.xlabel('Release Date') + plt.ylabel('Pass Rate 2') + plt.title('Model Performance Over Time') + plt.grid(True) + plt.tight_layout() + plt.show() + +# Example usage +plot_over_time('_data/edit_leaderboard.yml')