From ed0dc95a107b27fdf7b03dd396988d901901cb34 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Wed, 8 Nov 2023 11:16:56 -0800 Subject: [PATCH] copy --- assets/benchmarks-1106.svg | 337 +++++++++++++++++-------------- assets/benchmarks-speed-1106.svg | 297 +++++++++++++++------------ benchmark/benchmark.py | 4 +- 3 files changed, 356 insertions(+), 282 deletions(-) diff --git a/assets/benchmarks-1106.svg b/assets/benchmarks-1106.svg index e6b4fcb78..ab42481da 100644 --- a/assets/benchmarks-1106.svg +++ b/assets/benchmarks-1106.svg @@ -6,7 +6,7 @@ - 2023-11-08T10:28:45.418930 + 2023-11-08T11:16:45.721593 image/svg+xml @@ -32,8 +32,8 @@ z @@ -41,12 +41,12 @@ z - - + @@ -363,7 +363,7 @@ z - + @@ -428,7 +428,7 @@ z - + @@ -461,7 +461,7 @@ z - + @@ -507,7 +507,7 @@ z - + @@ -532,7 +532,7 @@ z - + @@ -819,16 +819,16 @@ z +" clip-path="url(#p2f2815f9ac)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - - + @@ -840,18 +840,18 @@ L -3.5 0 - + - + - + - + - + - + @@ -905,18 +905,18 @@ L 404.21745 153.82125 - + - + - + @@ -924,18 +924,18 @@ L 404.21745 111.111875 - + - + - + - + - + - + @@ -1004,7 +1004,7 @@ L 404.21745 25.693125 - + +" clip-path="url(#p2f2815f9ac)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> @@ -1299,13 +1299,13 @@ L 404.21745 239.24 " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/> - - + - + @@ -1370,7 +1370,7 @@ z - + @@ -1378,7 +1378,7 @@ z - + @@ -1386,7 +1386,7 @@ z - + @@ -1394,7 +1394,7 @@ z - + @@ -1402,7 +1402,7 @@ z - + @@ -1410,7 +1410,7 @@ z - + @@ -1418,7 +1418,7 @@ z - + @@ -1427,102 +1427,102 @@ z +" clip-path="url(#p2f2815f9ac)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p2f2815f9ac)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - - - + - + @@ -1579,7 +1579,7 @@ z - + @@ -1598,7 +1598,7 @@ z - + @@ -1614,17 +1614,17 @@ z - - - + - + @@ -1700,7 +1700,7 @@ z - + @@ -1717,7 +1717,7 @@ z - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + @@ -1872,16 +1905,16 @@ z - - + @@ -1889,16 +1922,16 @@ z - - + - + + diff --git a/assets/benchmarks-speed-1106.svg b/assets/benchmarks-speed-1106.svg index dc1ba67ea..c2f3b8c17 100644 --- a/assets/benchmarks-speed-1106.svg +++ b/assets/benchmarks-speed-1106.svg @@ -6,7 +6,7 @@ - 2023-11-08T10:52:40.272159 + 2023-11-08T11:16:14.597375 image/svg+xml @@ -32,8 +32,8 @@ z @@ -41,12 +41,12 @@ z - - + @@ -363,7 +363,7 @@ z - + @@ -428,7 +428,7 @@ z - + @@ -461,7 +461,7 @@ z - + @@ -507,7 +507,7 @@ z - + @@ -532,7 +532,7 @@ z - + @@ -819,16 +819,16 @@ z +" clip-path="url(#pfd3630de95)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - - + @@ -840,18 +840,18 @@ L -3.5 0 - + - + - + - + - + - + @@ -905,18 +905,18 @@ L 404.21745 189.266612 - + - + - + - + - + - + @@ -960,18 +960,18 @@ L 404.21745 139.293224 - + - + - + @@ -980,18 +980,18 @@ L 404.21745 114.306529 - + - + - + @@ -1000,18 +1000,18 @@ L 404.21745 89.319835 - + - + - + @@ -1020,18 +1020,18 @@ L 404.21745 64.333141 - + - + - + @@ -1040,7 +1040,7 @@ L 404.21745 39.346447 - + - + +" clip-path="url(#pfd3630de95)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pfd3630de95)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pfd3630de95)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pfd3630de95)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pfd3630de95)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pfd3630de95)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pfd3630de95)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pfd3630de95)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pfd3630de95)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pfd3630de95)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pfd3630de95)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pfd3630de95)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> @@ -1348,13 +1348,13 @@ L 404.21745 239.24 " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/> - - + @@ -1364,7 +1364,7 @@ L 404.21745 25.693125 - + @@ -1374,7 +1374,7 @@ L 404.21745 25.693125 - + - + @@ -1425,8 +1425,8 @@ z - - + + - - - - - - + + + + + - + @@ -1488,7 +1487,7 @@ z - + @@ -1497,7 +1496,7 @@ z - + @@ -1507,7 +1506,7 @@ z - + @@ -1516,7 +1515,7 @@ z - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + - - + - - + - + + diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index bb5bff0aa..28910f5c9 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -167,7 +167,7 @@ def plot_timing(df): ax.set_xticklabels(models) ax.set_ylabel("Average GPT response time\nper exercise (sec)") - ax.set_title("GPT Code Editing Speed") + ax.set_title("GPT Code Editing Speed\n(time per coding task)") ax.legend( title="Edit Format", loc="upper left", @@ -266,7 +266,7 @@ def plot_outcomes(df, repeats, repeat_hi, repeat_lo, repeat_avg): ax.set_ylabel("Percent of exercises completed successfully") # ax.set_xlabel("Model") - ax.set_title("GPT Code Editing Skill") + ax.set_title("GPT Code Editing Skill\n(percent coding tasks correct)") ax.legend( title="Edit Format", loc="upper left",