From 7875418183be830041cf439b4f4908839ec53eec Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Tue, 9 Apr 2024 18:11:08 -0700 Subject: [PATCH] fix column order --- assets/2024-04-09-gpt-4-turbo-laziness.svg | 346 ++++++++++----------- benchmark/benchmark.py | 16 +- 2 files changed, 186 insertions(+), 176 deletions(-) diff --git a/assets/2024-04-09-gpt-4-turbo-laziness.svg b/assets/2024-04-09-gpt-4-turbo-laziness.svg index 8e9377752..cdaa0b637 100644 --- a/assets/2024-04-09-gpt-4-turbo-laziness.svg +++ b/assets/2024-04-09-gpt-4-turbo-laziness.svg @@ -6,7 +6,7 @@ - 2024-04-09T16:51:38.404046 + 2024-04-09T18:10:47.008504 image/svg+xml @@ -41,12 +41,12 @@ z - - + @@ -172,9 +172,20 @@ z - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -426,10 +340,96 @@ z + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + @@ -784,16 +784,16 @@ z +" clip-path="url(#p91d81d2cf0)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - - + @@ -807,11 +807,11 @@ L -3.5 0 +" clip-path="url(#p91d81d2cf0)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -826,11 +826,11 @@ L 420.58832 196.2748 +" clip-path="url(#p91d81d2cf0)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -845,11 +845,11 @@ L 420.58832 153.5661 +" clip-path="url(#p91d81d2cf0)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -864,11 +864,11 @@ L 420.58832 110.8574 +" clip-path="url(#p91d81d2cf0)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -924,11 +924,11 @@ z +" clip-path="url(#p91d81d2cf0)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1142,18 +1142,18 @@ z +" clip-path="url(#p91d81d2cf0)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p91d81d2cf0)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p91d81d2cf0)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - - + + + - - + + - - - - - - - + + + + @@ -1514,7 +1512,7 @@ z - + diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index b09e82fe7..d55d77e57 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -126,8 +126,8 @@ def show_stats(dirnames, graphs): if graphs: # plot_timing(df) # plot_outcomes(df, repeats, repeat_hi, repeat_lo, repeat_avg) - plot_outcomes_claude(df) - # plot_refactoring(df) + # plot_outcomes_claude(df) + plot_refactoring(df) def plot_timing(df): @@ -440,6 +440,12 @@ def plot_refactoring(df): for grouped in tries: zorder += 1 df = grouped.unstack() + + i, j = 0, 1 + temp = df.iloc[i].copy() + df.iloc[i], df.iloc[j] = df.iloc[j], temp + dump(df) + # df.sort_values(by=["model"], ascending=False, inplace=True) num_models, num_formats = df.shape @@ -507,10 +513,16 @@ def plot_refactoring(df): "gpt-4-turbo-2024-04-09": "gpt-4-turbo-\n2024-04-09\n(GPT-4 Turbo with Vision)", } model_labels = [] + for model in models: ml = model_map.get(model, model) model_labels.append(ml) + model_labels = [ + "gpt-4-\n1106-preview", + "gpt-4-\n0125-preview", + "gpt-4-turbo-\n2024-04-09\n(GPT-4 Turbo with Vision)", + ] ax.set_xticklabels(model_labels, rotation=0) ax.set_ylabel("Percent of exercises completed successfully")