This commit is contained in:
Paul Gauthier 2024-06-01 07:03:58 -07:00
parent 871bdc8c9a
commit 7623b8e2e6
4 changed files with 148 additions and 209 deletions

View file

@ -23,7 +23,7 @@ that was reported recently.
[![SWE Bench results](/assets/swe_bench.svg)](https://aider.chat/assets/swe_bench.svg) [![SWE Bench results](/assets/swe_bench.svg)](https://aider.chat/assets/swe_bench.svg)
Aider was benchmarked on 570 of the 2294 SWE Bench problems. Aider was benchmarked on 570 of the 2294 SWE Bench problems.
These are the same These were the same
[randomly selected 570 problems](https://github.com/CognitionAI/devin-swebench-results/tree/main/output_diffs) that [randomly selected 570 problems](https://github.com/CognitionAI/devin-swebench-results/tree/main/output_diffs) that
[Devin used in their evaluation](https://www.cognition.ai/post/swe-bench-technical-report). [Devin used in their evaluation](https://www.cognition.ai/post/swe-bench-technical-report).
Please see the [references](#references) Please see the [references](#references)
@ -251,7 +251,7 @@ In these cases aider with Opus was unable to produce any solutions.
## Computing the benchmark score ## Computing the benchmark score
Benchmarking produced one proposed solution for each of The benchmark harness produced one proposed solution for each of
the 570 SWE Bench problems. the 570 SWE Bench problems.
A separate evaluation script was used to A separate evaluation script was used to

Binary file not shown.

Before

Width:  |  Height:  |  Size: 50 KiB

After

Width:  |  Height:  |  Size: 49 KiB

Before After
Before After

View file

@ -6,7 +6,7 @@
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<cc:Work> <cc:Work>
<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
<dc:date>2024-05-31T11:41:49.017547</dc:date> <dc:date>2024-06-01T07:02:59.687095</dc:date>
<dc:format>image/svg+xml</dc:format> <dc:format>image/svg+xml</dc:format>
<dc:creator> <dc:creator>
<cc:Agent> <cc:Agent>
@ -41,12 +41,12 @@ z
<g id="xtick_1"> <g id="xtick_1">
<g id="line2d_1"> <g id="line2d_1">
<defs> <defs>
<path id="m3c08837b00" d="M 0 0 <path id="m57dfe31113" d="M 0 0
L 0 3.5 L 0 3.5
" style="stroke: #000000; stroke-width: 0.8"/> " style="stroke: #000000; stroke-width: 0.8"/>
</defs> </defs>
<g> <g>
<use xlink:href="#m3c08837b00" x="137.644385" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#m57dfe31113" x="137.644385" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_1"> <g id="text_1">
@ -527,7 +527,7 @@ z
<g id="xtick_2"> <g id="xtick_2">
<g id="line2d_2"> <g id="line2d_2">
<g> <g>
<use xlink:href="#m3c08837b00" x="219.596257" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#m57dfe31113" x="219.596257" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_2"> <g id="text_2">
@ -707,7 +707,7 @@ z
<g id="xtick_3"> <g id="xtick_3">
<g id="line2d_3"> <g id="line2d_3">
<g> <g>
<use xlink:href="#m3c08837b00" x="301.548128" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#m57dfe31113" x="301.548128" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_3"> <g id="text_3">
@ -813,7 +813,7 @@ z
<g id="xtick_4"> <g id="xtick_4">
<g id="line2d_4"> <g id="line2d_4">
<g> <g>
<use xlink:href="#m3c08837b00" x="383.5" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#m57dfe31113" x="383.5" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_4"> <g id="text_4">
@ -1017,7 +1017,7 @@ z
<g id="xtick_5"> <g id="xtick_5">
<g id="line2d_5"> <g id="line2d_5">
<g> <g>
<use xlink:href="#m3c08837b00" x="465.451872" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#m57dfe31113" x="465.451872" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_5"> <g id="text_5">
@ -1120,7 +1120,7 @@ z
<g id="xtick_6"> <g id="xtick_6">
<g id="line2d_6"> <g id="line2d_6">
<g> <g>
<use xlink:href="#m3c08837b00" x="547.403743" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#m57dfe31113" x="547.403743" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_6"> <g id="text_6">
@ -1154,7 +1154,7 @@ z
<g id="xtick_7"> <g id="xtick_7">
<g id="line2d_7"> <g id="line2d_7">
<g> <g>
<use xlink:href="#m3c08837b00" x="629.355615" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#m57dfe31113" x="629.355615" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_7"> <g id="text_7">
@ -1247,16 +1247,16 @@ z
<g id="line2d_8"> <g id="line2d_8">
<path d="M 77 307.03625 <path d="M 77 307.03625
L 690 307.03625 L 690 307.03625
" clip-path="url(#p1ec2c53f8e)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#p22faac38c8)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_9"> <g id="line2d_9">
<defs> <defs>
<path id="m167bb8a136" d="M 0 0 <path id="mcf4a99e3f2" d="M 0 0
L -3.5 0 L -3.5 0
" style="stroke: #000000; stroke-width: 0.8"/> " style="stroke: #000000; stroke-width: 0.8"/>
</defs> </defs>
<g> <g>
<use xlink:href="#m167bb8a136" x="77" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mcf4a99e3f2" x="77" y="307.03625" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_8"> <g id="text_8">
@ -1281,11 +1281,11 @@ z
<g id="line2d_10"> <g id="line2d_10">
<path d="M 77 274.534192 <path d="M 77 274.534192
L 690 274.534192 L 690 274.534192
" clip-path="url(#p1ec2c53f8e)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#p22faac38c8)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_11"> <g id="line2d_11">
<g> <g>
<use xlink:href="#m167bb8a136" x="77" y="274.534192" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mcf4a99e3f2" x="77" y="274.534192" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_9"> <g id="text_9">
@ -1301,11 +1301,11 @@ L 690 274.534192
<g id="line2d_12"> <g id="line2d_12">
<path d="M 77 242.032134 <path d="M 77 242.032134
L 690 242.032134 L 690 242.032134
" clip-path="url(#p1ec2c53f8e)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#p22faac38c8)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_13"> <g id="line2d_13">
<g> <g>
<use xlink:href="#m167bb8a136" x="77" y="242.032134" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mcf4a99e3f2" x="77" y="242.032134" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_10"> <g id="text_10">
@ -1321,11 +1321,11 @@ L 690 242.032134
<g id="line2d_14"> <g id="line2d_14">
<path d="M 77 209.530076 <path d="M 77 209.530076
L 690 209.530076 L 690 209.530076
" clip-path="url(#p1ec2c53f8e)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#p22faac38c8)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_15"> <g id="line2d_15">
<g> <g>
<use xlink:href="#m167bb8a136" x="77" y="209.530076" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mcf4a99e3f2" x="77" y="209.530076" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_11"> <g id="text_11">
@ -1341,11 +1341,11 @@ L 690 209.530076
<g id="line2d_16"> <g id="line2d_16">
<path d="M 77 177.028018 <path d="M 77 177.028018
L 690 177.028018 L 690 177.028018
" clip-path="url(#p1ec2c53f8e)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#p22faac38c8)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_17"> <g id="line2d_17">
<g> <g>
<use xlink:href="#m167bb8a136" x="77" y="177.028018" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mcf4a99e3f2" x="77" y="177.028018" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_12"> <g id="text_12">
@ -1375,11 +1375,11 @@ z
<g id="line2d_18"> <g id="line2d_18">
<path d="M 77 144.52596 <path d="M 77 144.52596
L 690 144.52596 L 690 144.52596
" clip-path="url(#p1ec2c53f8e)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#p22faac38c8)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_19"> <g id="line2d_19">
<g> <g>
<use xlink:href="#m167bb8a136" x="77" y="144.52596" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mcf4a99e3f2" x="77" y="144.52596" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_13"> <g id="text_13">
@ -1396,11 +1396,11 @@ L 690 144.52596
<g id="line2d_20"> <g id="line2d_20">
<path d="M 77 112.023902 <path d="M 77 112.023902
L 690 112.023902 L 690 112.023902
" clip-path="url(#p1ec2c53f8e)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#p22faac38c8)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_21"> <g id="line2d_21">
<g> <g>
<use xlink:href="#m167bb8a136" x="77" y="112.023902" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mcf4a99e3f2" x="77" y="112.023902" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_14"> <g id="text_14">
@ -1417,11 +1417,11 @@ L 690 112.023902
<g id="line2d_22"> <g id="line2d_22">
<path d="M 77 79.521844 <path d="M 77 79.521844
L 690 79.521844 L 690 79.521844
" clip-path="url(#p1ec2c53f8e)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#p22faac38c8)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_23"> <g id="line2d_23">
<g> <g>
<use xlink:href="#m167bb8a136" x="77" y="79.521844" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mcf4a99e3f2" x="77" y="79.521844" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_15"> <g id="text_15">
@ -1568,7 +1568,7 @@ L 170.425134 307.03625
L 170.425134 170.527606 L 170.425134 170.527606
L 104.863636 170.527606 L 104.863636 170.527606
z z
" clip-path="url(#p1ec2c53f8e)" style="fill: #b3d1e6; opacity: 0.3"/> " clip-path="url(#p22faac38c8)" style="fill: #b3d1e6; opacity: 0.3"/>
</g> </g>
<g id="patch_8"> <g id="patch_8">
<path d="M 186.815508 307.03625 <path d="M 186.815508 307.03625
@ -1576,7 +1576,7 @@ L 252.377005 307.03625
L 252.377005 169.227524 L 252.377005 169.227524
L 186.815508 169.227524 L 186.815508 169.227524
z z
" clip-path="url(#p1ec2c53f8e)" style="fill: #b3d1e6; opacity: 0.3"/> " clip-path="url(#p22faac38c8)" style="fill: #b3d1e6; opacity: 0.3"/>
</g> </g>
<g id="patch_9"> <g id="patch_9">
<path d="M 268.76738 307.03625 <path d="M 268.76738 307.03625
@ -1584,7 +1584,7 @@ L 334.328877 307.03625
L 334.328877 144.52596 L 334.328877 144.52596
L 268.76738 144.52596 L 268.76738 144.52596
z z
" clip-path="url(#p1ec2c53f8e)" style="fill: #b3d1e6; opacity: 0.3"/> " clip-path="url(#p22faac38c8)" style="fill: #b3d1e6; opacity: 0.3"/>
</g> </g>
<g id="patch_10"> <g id="patch_10">
<path d="M 350.719251 307.03625 <path d="M 350.719251 307.03625
@ -1592,7 +1592,7 @@ L 416.280749 307.03625
L 416.280749 127.62489 L 416.280749 127.62489
L 350.719251 127.62489 L 350.719251 127.62489
z z
" clip-path="url(#p1ec2c53f8e)" style="fill: #b3d1e6; opacity: 0.3"/> " clip-path="url(#p22faac38c8)" style="fill: #b3d1e6; opacity: 0.3"/>
</g> </g>
<g id="patch_11"> <g id="patch_11">
<path d="M 432.671123 307.03625 <path d="M 432.671123 307.03625
@ -1600,7 +1600,7 @@ L 498.23262 307.03625
L 498.23262 126.324807 L 498.23262 126.324807
L 432.671123 126.324807 L 432.671123 126.324807
z z
" clip-path="url(#p1ec2c53f8e)" style="fill: #b3d1e6; opacity: 0.3"/> " clip-path="url(#p22faac38c8)" style="fill: #b3d1e6; opacity: 0.3"/>
</g> </g>
<g id="patch_12"> <g id="patch_12">
<path d="M 514.622995 307.03625 <path d="M 514.622995 307.03625
@ -1608,7 +1608,7 @@ L 580.184492 307.03625
L 580.184492 86.022256 L 580.184492 86.022256
L 514.622995 86.022256 L 514.622995 86.022256
z z
" clip-path="url(#p1ec2c53f8e)" style="fill: #17965a; opacity: 0.9"/> " clip-path="url(#p22faac38c8)" style="fill: #17965a; opacity: 0.9"/>
</g> </g>
<g id="patch_13"> <g id="patch_13">
<path d="M 596.574866 307.03625 <path d="M 596.574866 307.03625
@ -1616,7 +1616,7 @@ L 662.136364 307.03625
L 662.136364 62.620774 L 662.136364 62.620774
L 596.574866 62.620774 L 596.574866 62.620774
z z
" clip-path="url(#p1ec2c53f8e)" style="fill: #17965a; opacity: 0.9"/> " clip-path="url(#p22faac38c8)" style="fill: #17965a; opacity: 0.9"/>
</g> </g>
<g id="text_17"> <g id="text_17">
<!-- 10.5% --> <!-- 10.5% -->
@ -2006,60 +2006,9 @@ z
</g> </g>
</g> </g>
<g id="text_25"> <g id="text_25">
<!-- Note: (570) and (2294) refer to the number of SWE Bench instances that were benchmarked. --> <!-- (570) and (2294) denote the number of SWE Bench instances benchmarked -->
<g style="fill: #555555" transform="translate(112.887188 410.4) scale(0.12 -0.12)"> <g style="fill: #555555" transform="translate(157.561875 410.4) scale(0.12 -0.12)">
<defs> <defs>
<path id="Helvetica-4e" d="M 488 4591
L 1222 4591
L 3541 872
L 3541 4591
L 4131 4591
L 4131 0
L 3434 0
L 1081 3716
L 1081 0
L 488 0
L 488 4591
z
M 2269 4591
L 2269 4591
z
" transform="scale(0.015625)"/>
<path id="Helvetica-3a" d="M 709 3303
L 1363 3303
L 1363 2622
L 709 2622
L 709 3303
z
M 709 681
L 1363 681
L 1363 0
L 709 0
L 709 681
z
" transform="scale(0.015625)"/>
<path id="Helvetica-66" d="M 553 3856
Q 566 4206 675 4369
Q 872 4656 1434 4656
Q 1488 4656 1544 4653
Q 1600 4650 1672 4644
L 1672 4131
Q 1584 4138 1545 4139
Q 1506 4141 1472 4141
Q 1216 4141 1166 4008
Q 1116 3875 1116 3331
L 1672 3331
L 1672 2888
L 1109 2888
L 1109 0
L 553 0
L 553 2888
L 88 2888
L 88 3331
L 553 3331
L 553 3856
z
" transform="scale(0.015625)"/>
<path id="Helvetica-62" d="M 369 4606 <path id="Helvetica-62" d="M 369 4606
L 916 4606 L 916 4606
L 916 2941 L 916 2941
@ -2086,20 +2035,26 @@ Q 884 1238 1003 944
Q 1225 391 1831 391 Q 1225 391 1831 391
z z
" transform="scale(0.015625)"/> " transform="scale(0.015625)"/>
<path id="Helvetica-77" d="M 672 3347 <path id="Helvetica-66" d="M 553 3856
L 1316 709 Q 566 4206 675 4369
L 1969 3347 Q 872 4656 1434 4656
L 2600 3347 Q 1488 4656 1544 4653
L 3256 725 Q 1600 4650 1672 4644
L 3941 3347 L 1672 4131
L 4503 3347 Q 1584 4138 1545 4139
L 3531 0 Q 1506 4141 1472 4141
L 2947 0 Q 1216 4141 1166 4008
L 2266 2591 Q 1116 3875 1116 3331
L 1606 0 L 1672 3331
L 1022 0 L 1672 2888
L 56 3347 L 1109 2888
L 672 3347 L 1109 0
L 553 0
L 553 2888
L 88 2888
L 88 3331
L 553 3331
L 553 3856
z z
" transform="scale(0.015625)"/> " transform="scale(0.015625)"/>
<path id="Helvetica-6b" d="M 400 4591 <path id="Helvetica-6b" d="M 400 4591
@ -2118,99 +2073,80 @@ L 400 4591
z z
" transform="scale(0.015625)"/> " transform="scale(0.015625)"/>
</defs> </defs>
<use xlink:href="#Helvetica-4e"/> <use xlink:href="#Helvetica-28"/>
<use xlink:href="#Helvetica-6f" x="72.216797"/> <use xlink:href="#Helvetica-35" x="33.300781"/>
<use xlink:href="#Helvetica-74" x="127.832031"/> <use xlink:href="#Helvetica-37" x="88.916016"/>
<use xlink:href="#Helvetica-65" x="155.615234"/> <use xlink:href="#Helvetica-30" x="144.53125"/>
<use xlink:href="#Helvetica-3a" x="211.230469"/> <use xlink:href="#Helvetica-29" x="200.146484"/>
<use xlink:href="#Helvetica-20" x="239.013672"/> <use xlink:href="#Helvetica-20" x="233.447266"/>
<use xlink:href="#Helvetica-28" x="266.796875"/> <use xlink:href="#Helvetica-61" x="261.230469"/>
<use xlink:href="#Helvetica-35" x="300.097656"/> <use xlink:href="#Helvetica-6e" x="316.845703"/>
<use xlink:href="#Helvetica-37" x="355.712891"/> <use xlink:href="#Helvetica-64" x="372.460938"/>
<use xlink:href="#Helvetica-30" x="411.328125"/> <use xlink:href="#Helvetica-20" x="428.076172"/>
<use xlink:href="#Helvetica-29" x="466.943359"/> <use xlink:href="#Helvetica-28" x="455.859375"/>
<use xlink:href="#Helvetica-20" x="500.244141"/> <use xlink:href="#Helvetica-32" x="489.160156"/>
<use xlink:href="#Helvetica-61" x="528.027344"/> <use xlink:href="#Helvetica-32" x="544.775391"/>
<use xlink:href="#Helvetica-6e" x="583.642578"/> <use xlink:href="#Helvetica-39" x="600.390625"/>
<use xlink:href="#Helvetica-64" x="639.257812"/> <use xlink:href="#Helvetica-34" x="656.005859"/>
<use xlink:href="#Helvetica-20" x="694.873047"/> <use xlink:href="#Helvetica-29" x="711.621094"/>
<use xlink:href="#Helvetica-28" x="722.65625"/> <use xlink:href="#Helvetica-20" x="744.921875"/>
<use xlink:href="#Helvetica-32" x="755.957031"/> <use xlink:href="#Helvetica-64" x="772.705078"/>
<use xlink:href="#Helvetica-32" x="811.572266"/> <use xlink:href="#Helvetica-65" x="828.320312"/>
<use xlink:href="#Helvetica-39" x="867.1875"/> <use xlink:href="#Helvetica-6e" x="883.935547"/>
<use xlink:href="#Helvetica-34" x="922.802734"/> <use xlink:href="#Helvetica-6f" x="939.550781"/>
<use xlink:href="#Helvetica-29" x="978.417969"/> <use xlink:href="#Helvetica-74" x="995.166016"/>
<use xlink:href="#Helvetica-20" x="1011.71875"/> <use xlink:href="#Helvetica-65" x="1022.949219"/>
<use xlink:href="#Helvetica-72" x="1039.501953"/> <use xlink:href="#Helvetica-20" x="1078.564453"/>
<use xlink:href="#Helvetica-65" x="1072.802734"/> <use xlink:href="#Helvetica-74" x="1106.347656"/>
<use xlink:href="#Helvetica-66" x="1128.417969"/> <use xlink:href="#Helvetica-68" x="1134.130859"/>
<use xlink:href="#Helvetica-65" x="1156.201172"/> <use xlink:href="#Helvetica-65" x="1189.746094"/>
<use xlink:href="#Helvetica-72" x="1211.816406"/> <use xlink:href="#Helvetica-20" x="1245.361328"/>
<use xlink:href="#Helvetica-20" x="1245.117188"/> <use xlink:href="#Helvetica-6e" x="1273.144531"/>
<use xlink:href="#Helvetica-74" x="1272.900391"/> <use xlink:href="#Helvetica-75" x="1328.759766"/>
<use xlink:href="#Helvetica-6f" x="1300.683594"/> <use xlink:href="#Helvetica-6d" x="1384.375"/>
<use xlink:href="#Helvetica-20" x="1356.298828"/> <use xlink:href="#Helvetica-62" x="1467.675781"/>
<use xlink:href="#Helvetica-74" x="1384.082031"/> <use xlink:href="#Helvetica-65" x="1523.291016"/>
<use xlink:href="#Helvetica-68" x="1411.865234"/> <use xlink:href="#Helvetica-72" x="1578.90625"/>
<use xlink:href="#Helvetica-65" x="1467.480469"/> <use xlink:href="#Helvetica-20" x="1612.207031"/>
<use xlink:href="#Helvetica-20" x="1523.095703"/> <use xlink:href="#Helvetica-6f" x="1639.990234"/>
<use xlink:href="#Helvetica-6e" x="1550.878906"/> <use xlink:href="#Helvetica-66" x="1695.605469"/>
<use xlink:href="#Helvetica-75" x="1606.494141"/> <use xlink:href="#Helvetica-20" x="1723.388672"/>
<use xlink:href="#Helvetica-6d" x="1662.109375"/> <use xlink:href="#Helvetica-53" x="1751.171875"/>
<use xlink:href="#Helvetica-62" x="1745.410156"/> <use xlink:href="#Helvetica-57" x="1817.871094"/>
<use xlink:href="#Helvetica-65" x="1801.025391"/> <use xlink:href="#Helvetica-45" x="1912.255859"/>
<use xlink:href="#Helvetica-72" x="1856.640625"/> <use xlink:href="#Helvetica-20" x="1978.955078"/>
<use xlink:href="#Helvetica-20" x="1889.941406"/> <use xlink:href="#Helvetica-42" x="2006.738281"/>
<use xlink:href="#Helvetica-6f" x="1917.724609"/> <use xlink:href="#Helvetica-65" x="2073.4375"/>
<use xlink:href="#Helvetica-66" x="1973.339844"/> <use xlink:href="#Helvetica-6e" x="2129.052734"/>
<use xlink:href="#Helvetica-20" x="2001.123047"/> <use xlink:href="#Helvetica-63" x="2184.667969"/>
<use xlink:href="#Helvetica-53" x="2028.90625"/> <use xlink:href="#Helvetica-68" x="2234.667969"/>
<use xlink:href="#Helvetica-57" x="2095.605469"/> <use xlink:href="#Helvetica-20" x="2290.283203"/>
<use xlink:href="#Helvetica-45" x="2189.990234"/> <use xlink:href="#Helvetica-69" x="2318.066406"/>
<use xlink:href="#Helvetica-20" x="2256.689453"/> <use xlink:href="#Helvetica-6e" x="2340.283203"/>
<use xlink:href="#Helvetica-42" x="2284.472656"/> <use xlink:href="#Helvetica-73" x="2395.898438"/>
<use xlink:href="#Helvetica-65" x="2351.171875"/> <use xlink:href="#Helvetica-74" x="2445.898438"/>
<use xlink:href="#Helvetica-6e" x="2406.787109"/> <use xlink:href="#Helvetica-61" x="2473.681641"/>
<use xlink:href="#Helvetica-63" x="2462.402344"/> <use xlink:href="#Helvetica-6e" x="2529.296875"/>
<use xlink:href="#Helvetica-68" x="2512.402344"/> <use xlink:href="#Helvetica-63" x="2584.912109"/>
<use xlink:href="#Helvetica-20" x="2568.017578"/> <use xlink:href="#Helvetica-65" x="2634.912109"/>
<use xlink:href="#Helvetica-69" x="2595.800781"/> <use xlink:href="#Helvetica-73" x="2690.527344"/>
<use xlink:href="#Helvetica-6e" x="2618.017578"/> <use xlink:href="#Helvetica-20" x="2740.527344"/>
<use xlink:href="#Helvetica-73" x="2673.632812"/> <use xlink:href="#Helvetica-62" x="2768.310547"/>
<use xlink:href="#Helvetica-74" x="2723.632812"/> <use xlink:href="#Helvetica-65" x="2823.925781"/>
<use xlink:href="#Helvetica-61" x="2751.416016"/> <use xlink:href="#Helvetica-6e" x="2879.541016"/>
<use xlink:href="#Helvetica-6e" x="2807.03125"/> <use xlink:href="#Helvetica-63" x="2935.15625"/>
<use xlink:href="#Helvetica-63" x="2862.646484"/> <use xlink:href="#Helvetica-68" x="2985.15625"/>
<use xlink:href="#Helvetica-65" x="2912.646484"/> <use xlink:href="#Helvetica-6d" x="3040.771484"/>
<use xlink:href="#Helvetica-73" x="2968.261719"/> <use xlink:href="#Helvetica-61" x="3124.072266"/>
<use xlink:href="#Helvetica-20" x="3018.261719"/> <use xlink:href="#Helvetica-72" x="3179.6875"/>
<use xlink:href="#Helvetica-74" x="3046.044922"/> <use xlink:href="#Helvetica-6b" x="3212.988281"/>
<use xlink:href="#Helvetica-68" x="3073.828125"/> <use xlink:href="#Helvetica-65" x="3262.988281"/>
<use xlink:href="#Helvetica-61" x="3129.443359"/> <use xlink:href="#Helvetica-64" x="3318.603516"/>
<use xlink:href="#Helvetica-74" x="3185.058594"/>
<use xlink:href="#Helvetica-20" x="3212.841797"/>
<use xlink:href="#Helvetica-77" x="3240.625"/>
<use xlink:href="#Helvetica-65" x="3312.841797"/>
<use xlink:href="#Helvetica-72" x="3368.457031"/>
<use xlink:href="#Helvetica-65" x="3401.757812"/>
<use xlink:href="#Helvetica-20" x="3457.373047"/>
<use xlink:href="#Helvetica-62" x="3485.15625"/>
<use xlink:href="#Helvetica-65" x="3540.771484"/>
<use xlink:href="#Helvetica-6e" x="3596.386719"/>
<use xlink:href="#Helvetica-63" x="3652.001953"/>
<use xlink:href="#Helvetica-68" x="3702.001953"/>
<use xlink:href="#Helvetica-6d" x="3757.617188"/>
<use xlink:href="#Helvetica-61" x="3840.917969"/>
<use xlink:href="#Helvetica-72" x="3896.533203"/>
<use xlink:href="#Helvetica-6b" x="3929.833984"/>
<use xlink:href="#Helvetica-65" x="3979.833984"/>
<use xlink:href="#Helvetica-64" x="4035.449219"/>
<use xlink:href="#Helvetica-2e" x="4091.064453"/>
</g> </g>
</g> </g>
</g> </g>
<defs> <defs>
<clipPath id="p1ec2c53f8e"> <clipPath id="p22faac38c8">
<rect x="77" y="50.4" width="613" height="256.63625"/> <rect x="77" y="50.4" width="613" height="256.63625"/>
</clipPath> </clipPath>
</defs> </defs>

Before

Width:  |  Height:  |  Size: 57 KiB

After

Width:  |  Height:  |  Size: 55 KiB

Before After
Before After

View file

@ -6,7 +6,7 @@ from imgcat import imgcat
from matplotlib import rc from matplotlib import rc
def plot_swe_bench_lite(data_file): def plot_swe_bench(data_file, is_lite):
with open(data_file, "r") as file: with open(data_file, "r") as file:
lines = file.readlines() lines = file.readlines()
@ -45,7 +45,7 @@ def plot_swe_bench_lite(data_file):
for model, pass_rate, color in zip(models, pass_rates, colors): for model, pass_rate, color in zip(models, pass_rates, colors):
alpha = 0.9 if "Aider" in model else 0.3 alpha = 0.9 if "Aider" in model else 0.3
hatch = "" hatch = ""
# if "lite" not in data_file: # if is_lite:
# hatch = "///" if "(570)" in model else "" # hatch = "///" if "(570)" in model else ""
bar = ax.bar(model, pass_rate, color=color, alpha=alpha, zorder=3, hatch=hatch) bar = ax.bar(model, pass_rate, color=color, alpha=alpha, zorder=3, hatch=hatch)
bars.append(bar[0]) bars.append(bar[0])
@ -69,7 +69,7 @@ def plot_swe_bench_lite(data_file):
# ax.set_xlabel("Models", fontsize=18) # ax.set_xlabel("Models", fontsize=18)
ax.set_ylabel("Instances resolved (%)", fontsize=18, color=font_color) ax.set_ylabel("Instances resolved (%)", fontsize=18, color=font_color)
if "lite" in data_file: if is_lite:
title = "SWE Bench Lite" title = "SWE Bench Lite"
else: else:
title = "SWE Bench" title = "SWE Bench"
@ -80,21 +80,22 @@ def plot_swe_bench_lite(data_file):
color=font_color, color=font_color,
) )
# Add note at the bottom of the graph if is_lite:
note = ( plt.tight_layout(pad=3.0)
"Note: (570) and (2294) refer to the number of SWE Bench instances that were benchmarked." else:
) # Add note at the bottom of the graph
plt.figtext( note = "(570) and (2294) denote the number of SWE Bench instances benchmarked"
0.5, plt.figtext(
0.05, 0.5,
note, 0.05,
wrap=True, note,
horizontalalignment="center", wrap=True,
fontsize=12, horizontalalignment="center",
color=font_color, fontsize=12,
) color=font_color,
)
plt.tight_layout(pad=3.0, rect=[0, 0.05, 1, 1]) plt.tight_layout(pad=3.0, rect=[0, 0.05, 1, 1])
out_fname = Path(data_file.replace("-", "_")) out_fname = Path(data_file.replace("-", "_"))
plt.savefig(out_fname.with_suffix(".jpg").name) plt.savefig(out_fname.with_suffix(".jpg").name)
@ -104,4 +105,6 @@ def plot_swe_bench_lite(data_file):
fname = sys.argv[1] fname = sys.argv[1]
plot_swe_bench_lite(fname) is_lite = "lite" in fname
plot_swe_bench(fname, is_lite)