diff --git a/assets/benchmarks-1106.svg b/assets/benchmarks-1106.svg index dc3519cdb..8c20864a1 100644 --- a/assets/benchmarks-1106.svg +++ b/assets/benchmarks-1106.svg @@ -6,7 +6,7 @@ - 2023-11-07T06:36:57.392139 + 2023-11-07T07:21:57.718957 image/svg+xml @@ -41,12 +41,12 @@ z - - + @@ -363,7 +363,7 @@ z - + @@ -428,7 +428,7 @@ z - + @@ -461,7 +461,7 @@ z - + @@ -507,7 +507,7 @@ z - + @@ -532,7 +532,7 @@ z - + @@ -766,16 +766,16 @@ z +" clip-path="url(#pccd18e2cbe)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - - + @@ -789,11 +789,11 @@ L -3.5 0 +" clip-path="url(#pccd18e2cbe)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -835,11 +835,11 @@ z +" clip-path="url(#pccd18e2cbe)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -854,11 +854,11 @@ L 404.17201 153.82125 +" clip-path="url(#pccd18e2cbe)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -873,11 +873,11 @@ L 404.17201 111.111875 +" clip-path="url(#pccd18e2cbe)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -933,11 +933,11 @@ z +" clip-path="url(#pccd18e2cbe)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1199,17 +1199,17 @@ z +" clip-path="url(#pccd18e2cbe)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - + @@ -1394,17 +1394,17 @@ z +" clip-path="url(#pccd18e2cbe)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pccd18e2cbe)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - - @@ -1635,8 +1635,8 @@ z - - + + @@ -1651,26 +1651,26 @@ z + + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + @@ -1868,7 +1868,7 @@ z - + diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 478b1034a..0b04eb014 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -189,14 +189,14 @@ def show_stats(dirnames): top = 95 ax.annotate( "First attempt,\nbased on\nnatural language\ninstructions", - xy=(2.25, 40), + xy=(2.20, 41), xytext=(2, top), horizontalalignment="center", verticalalignment="top", arrowprops={"arrowstyle": "->", "connectionstyle": "arc3,rad=0.3"}, ) ax.annotate( - "Second attempt,\nincluding unit\ntest error output", + "Second attempt,\nincluding unit test\nerror output", xy=(2.55, 56), xytext=(3.5, top), horizontalalignment="center", diff --git a/docs/benchmarks-1106.md b/docs/benchmarks-1106.md index 59f343b3d..366b4c973 100644 --- a/docs/benchmarks-1106.md +++ b/docs/benchmarks-1106.md @@ -48,7 +48,7 @@ The benchmark gives aider two tries to complete the task: OpenAI is enforcing very low rate limits on the new GPT-4 model. The limits are so low, that I have only been able to attempt -83 +85 out of 133 exercism problems. The problems are selected in random order, so results should be *roughly* indicative of the full benchmark.