diff --git a/assets/benchmarks.svg b/assets/benchmarks.svg index de267fc90..e578b8722 100644 --- a/assets/benchmarks.svg +++ b/assets/benchmarks.svg @@ -6,7 +6,7 @@ - 2023-07-01T16:22:55.676681 + 2023-07-01T16:38:56.085115 image/svg+xml @@ -41,12 +41,12 @@ z - - + @@ -358,7 +358,7 @@ z - + @@ -420,7 +420,7 @@ z - + @@ -471,7 +471,7 @@ z - + @@ -514,7 +514,7 @@ z - + @@ -539,16 +539,16 @@ z +" clip-path="url(#pe888c5c299)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - - + @@ -562,11 +562,11 @@ L -3.5 0 +" clip-path="url(#pe888c5c299)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -608,11 +608,11 @@ z +" clip-path="url(#pe888c5c299)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -627,11 +627,11 @@ L 840.24 329.012528 +" clip-path="url(#pe888c5c299)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -646,11 +646,11 @@ L 840.24 235.68731 +" clip-path="url(#pe888c5c299)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -706,11 +706,11 @@ z +" clip-path="url(#pe888c5c299)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1060,7 +1060,7 @@ L 174.749508 515.662963 L 174.749508 375.32429 L 148.622182 375.32429 z -" clip-path="url(#pa23653cbc7)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 0.25; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h096bbb71c9); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h096bbb71c9); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h096bbb71c9); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h096bbb71c9); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h096bbb71c9); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h310909dbbf); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h310909dbbf); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h310909dbbf); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h310909dbbf); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h310909dbbf); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h096bbb71c9); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h096bbb71c9); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h096bbb71c9); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h096bbb71c9); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h096bbb71c9); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h310909dbbf); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h310909dbbf); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h310909dbbf); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h310909dbbf); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#pe888c5c299)" style="fill: url(#h310909dbbf); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> + + + + - - + + - - + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + + - - - - - - + - + + + + + + + + + + + + + + + + + + + + + + + + - +" style="fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter"/> + + + + @@ -1846,15 +1899,15 @@ z - + +" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - + @@ -1863,15 +1916,15 @@ z - + +" style="fill: url(#h096bbb71c9); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - + @@ -1885,15 +1938,15 @@ z - + +" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - + @@ -1903,15 +1956,15 @@ z - + +" style="fill: url(#h310909dbbf); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - + @@ -1930,12 +1983,12 @@ z - + - + - + ", "connectionstyle": "arc3,rad=0.3"}, + ) + ax.annotate( + "Second attempt,\nafter seeing\nunittest errors", + xy=(3.1, 68), + xytext=(4.25, 80), + horizontalalignment="center", + arrowprops={"arrowstyle": "->", "connectionstyle": "arc3,rad=0.3"}, + ) + ax.set_ylabel("Percent of exercises with\nall unittests passing") # ax.set_xlabel("Model") - ax.set_title("Code editing success rate by model & edit format") + ax.set_title("Code Editing Success") ax.legend( title="Edit Format", loc="upper left", # bbox_to_anchor=(0.95, 0.95), ) ax.set_ylim(top=100) + plt.tight_layout() plt.savefig("tmp.svg") imgcat(fig)