diff --git a/assets/swe_bench.jpg b/assets/swe_bench.jpg index 9b5029fa8..bcfe7f561 100644 Binary files a/assets/swe_bench.jpg and b/assets/swe_bench.jpg differ diff --git a/assets/swe_bench.svg b/assets/swe_bench.svg index 149381f98..e27939dec 100644 --- a/assets/swe_bench.svg +++ b/assets/swe_bench.svg @@ -6,7 +6,7 @@ - 2024-06-01T16:00:26.751322 + 2024-06-02T09:28:07.920943 image/svg+xml @@ -41,12 +41,12 @@ z - - + @@ -412,7 +412,7 @@ z - + @@ -583,7 +583,7 @@ z - + @@ -699,7 +699,7 @@ z - + @@ -894,7 +894,7 @@ z - + @@ -926,7 +926,7 @@ z - + @@ -1157,7 +1157,7 @@ z - + @@ -1339,16 +1339,16 @@ z +" clip-path="url(#p29664ad139)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - - + @@ -1394,11 +1394,11 @@ z +" clip-path="url(#p29664ad139)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1467,11 +1467,11 @@ z +" clip-path="url(#p29664ad139)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1487,11 +1487,11 @@ L 690 242.845658 +" clip-path="url(#p29664ad139)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1523,16 +1523,16 @@ z +" clip-path="url(#p29664ad139)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + - + +" clip-path="url(#p29664ad139)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + - + @@ -1578,16 +1578,16 @@ L 690 145.618145 +" clip-path="url(#p29664ad139)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + - + @@ -1599,16 +1599,16 @@ L 690 113.208974 +" clip-path="url(#p29664ad139)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + - + @@ -1617,38 +1617,60 @@ L 690 80.799802 - - + + - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + @@ -1780,7 +1790,7 @@ L 170.425134 307.664 L 170.425134 171.545481 L 104.863636 171.545481 z -" clip-path="url(#pb8819c8324)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#p29664ad139)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#p29664ad139)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#p29664ad139)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#p29664ad139)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#p29664ad139)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#p29664ad139)" style="fill: #1a75c2; opacity: 0.9"/> +" clip-path="url(#p29664ad139)" style="fill: #1a75c2; opacity: 0.9"/> @@ -2290,7 +2300,7 @@ z - + @@ -2369,6 +2379,30 @@ Q 3319 0 2413 0 L 472 0 L 472 4591 z +" transform="scale(0.015625)"/> + + diff --git a/assets/swe_bench_lite.jpg b/assets/swe_bench_lite.jpg index 23032d3b8..5d1e047f0 100644 Binary files a/assets/swe_bench_lite.jpg and b/assets/swe_bench_lite.jpg differ diff --git a/assets/swe_bench_lite.svg b/assets/swe_bench_lite.svg index 4317ae672..9a384063d 100644 --- a/assets/swe_bench_lite.svg +++ b/assets/swe_bench_lite.svg @@ -6,7 +6,7 @@ - 2024-06-01T07:33:14.155906 + 2024-06-02T09:28:31.406793 image/svg+xml @@ -41,12 +41,12 @@ z - - + @@ -412,7 +412,7 @@ z - + @@ -482,7 +482,7 @@ z - + @@ -598,7 +598,7 @@ z - + @@ -754,7 +754,7 @@ z - + @@ -928,7 +928,7 @@ z - + @@ -1159,7 +1159,7 @@ z - + @@ -1341,21 +1341,21 @@ z +" clip-path="url(#p0f09835c00)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - - + - + +" clip-path="url(#p0f09835c00)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + - + +" clip-path="url(#p0f09835c00)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1465,11 +1465,11 @@ z +" clip-path="url(#p0f09835c00)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1484,11 +1484,11 @@ L 690 167.922555 +" clip-path="url(#p0f09835c00)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1530,11 +1530,11 @@ z +" clip-path="url(#p0f09835c00)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1546,38 +1546,60 @@ L 690 74.761592 - - + + - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + @@ -1709,7 +1719,7 @@ L 163.368917 307.664 L 163.368917 198.665673 L 96.917045 198.665673 z -" clip-path="url(#p64bcd2b177)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#p0f09835c00)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#p0f09835c00)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#p0f09835c00)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#p0f09835c00)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#p0f09835c00)" style="fill: #b3d1e6; opacity: 0.3"/> +" clip-path="url(#p0f09835c00)" style="fill: #17965a; opacity: 0.9"/> +" clip-path="url(#p0f09835c00)" style="fill: #17965a; opacity: 0.9"/> @@ -2216,6 +2226,30 @@ Q 3319 0 2413 0 L 472 0 L 472 4591 z +" transform="scale(0.015625)"/> + + diff --git a/benchmark/swe_bench.py b/benchmark/swe_bench.py index 7e2ac9d81..56021e9ce 100644 --- a/benchmark/swe_bench.py +++ b/benchmark/swe_bench.py @@ -104,7 +104,7 @@ def plot_swe_bench(data_file, is_lite): ) # ax.set_xlabel("Models", fontsize=18) - ax.set_ylabel("Instances resolved (%)", fontsize=18, color=font_color) + ax.set_ylabel("Pass@1 (%)", fontsize=18, color=font_color) if is_lite: title = "SWE Bench Lite" else: