From 8ef166478a3c9af10160581261f2b4e2dfbeac16 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sat, 1 Jul 2023 17:18:14 -0700 Subject: [PATCH] graph layout --- assets/benchmarks.svg | 625 +++++++++++++++++++++-------------------- benchmark/benchmark.py | 15 +- docs/benchmarks.md | 4 +- 3 files changed, 331 insertions(+), 313 deletions(-) diff --git a/assets/benchmarks.svg b/assets/benchmarks.svg index e578b8722..2dfedbda3 100644 --- a/assets/benchmarks.svg +++ b/assets/benchmarks.svg @@ -1,12 +1,12 @@ - + - 2023-07-01T16:38:56.085115 + 2023-07-01T17:07:13.416292 image/svg+xml @@ -21,8 +21,8 @@ - - - - + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -347,23 +382,9 @@ z - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + @@ -409,23 +447,9 @@ z - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + - + - + - + - + @@ -537,41 +548,41 @@ z - + - - + - + - + - + - + - + - + - + @@ -625,18 +636,18 @@ L 840.24 329.012528 - + - + - + @@ -644,18 +655,18 @@ L 840.24 235.68731 - + - + - + +" clip-path="url(#p23c69ca731)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -724,7 +735,7 @@ L 840.24 49.036875 - + - + - +" clip-path="url(#p23c69ca731)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#p23c69ca731)" style="fill: url(#h83cb846590); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: url(#h83cb846590); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: url(#h83cb846590); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#p23c69ca731)" style="fill: url(#h83cb846590); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: url(#h83cb846590); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#p23c69ca731)" style="fill: url(#hd5951d1aae); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: url(#hd5951d1aae); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: url(#hd5951d1aae); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#p23c69ca731)" style="fill: url(#hd5951d1aae); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: url(#hd5951d1aae); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - - - @@ -1224,7 +1235,7 @@ L 840.24 49.036875 - + - + @@ -1289,7 +1300,7 @@ z - + @@ -1297,7 +1308,7 @@ z - + @@ -1305,7 +1316,7 @@ z - + @@ -1313,7 +1324,7 @@ z - + @@ -1321,7 +1332,7 @@ z - + @@ -1329,7 +1340,7 @@ z - + - + @@ -1377,7 +1388,7 @@ z - + @@ -1385,7 +1396,7 @@ z - + @@ -1393,7 +1404,7 @@ z - + @@ -1401,7 +1412,7 @@ z - + @@ -1409,7 +1420,7 @@ z - + @@ -1417,7 +1428,7 @@ z - + @@ -1425,172 +1436,172 @@ z - + - +" clip-path="url(#p23c69ca731)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3e6a8; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#p23c69ca731)" style="fill: url(#h83cb846590); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: url(#h83cb846590); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: url(#h83cb846590); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#p23c69ca731)" style="fill: url(#h83cb846590); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: url(#h83cb846590); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: #b3d1e6; stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#p23c69ca731)" style="fill: url(#hd5951d1aae); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: url(#hd5951d1aae); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: url(#hd5951d1aae); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" clip-path="url(#p23c69ca731)" style="fill: url(#hd5951d1aae); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - +" clip-path="url(#p23c69ca731)" style="fill: url(#hd5951d1aae); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> - - - + - + - - - + - + @@ -1782,7 +1793,7 @@ z - + @@ -1922,7 +1933,7 @@ L 179.488 147.352813 L 179.488 131.952813 L 135.488 131.952813 z -" style="fill: url(#h096bbb71c9); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" style="fill: url(#h83cb846590); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> @@ -1962,7 +1973,7 @@ L 179.488 210.087188 L 179.488 194.687188 L 135.488 194.687188 z -" style="fill: url(#h310909dbbf); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> +" style="fill: url(#hd5951d1aae); stroke: #eeeeee; stroke-width: 2; stroke-linejoin: miter"/> @@ -1983,12 +1994,12 @@ z - - + + - + - + ", "connectionstyle": "arc3,rad=0.3"}, ) diff --git a/docs/benchmarks.md b/docs/benchmarks.md index ee7212196..b90e7bf6d 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -235,8 +235,10 @@ The benchmark results have me fairly convinced that the new `gpt-3.5-turbo-0613` and `gpt-3.5-16k-0613` models are a bit worse at code editing than the older `gpt-3.5-turbo-0301` model. -This is especially visible if you look at just the "first coding attempt" +This is especially visible in the "first coding attempt" portion of each result, before GPT gets a second chance to edit the code. +Look at the horizontal white line in the middle of the first three blue bars. + Performance with the `whole` edit format was 46% for the February model and only 39% for the June models. I saw other signs of this degraded performance