This commit is contained in:
Paul Gauthier 2024-06-01 14:48:12 -07:00
parent 70411431ab
commit 2febc663f3
4 changed files with 36 additions and 36 deletions

View file

@ -239,7 +239,7 @@ We can see that Opus overrides
them with plausible-but-incorrect them with plausible-but-incorrect
solutions resulting in 0 resolved problems from that row. solutions resulting in 0 resolved problems from that row.
Rows G-K we cover the cases where neither model Rows G-K cover the cases where neither model
produced plausible solutions. produced plausible solutions.
Which solution was ultimately selected for each problem depends on Which solution was ultimately selected for each problem depends on
[details about which solution the harness considered "most plausible"](https://aider.chat/2024/05/22/swe-bench-lite.html#finding-a-plausible-solution). [details about which solution the harness considered "most plausible"](https://aider.chat/2024/05/22/swe-bench-lite.html#finding-a-plausible-solution).

Binary file not shown.

Before

Width:  |  Height:  |  Size: 43 KiB

After

Width:  |  Height:  |  Size: 43 KiB

Before After
Before After

View file

@ -6,7 +6,7 @@
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<cc:Work> <cc:Work>
<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
<dc:date>2024-06-01T11:46:22.003048</dc:date> <dc:date>2024-06-01T14:47:44.878771</dc:date>
<dc:format>image/svg+xml</dc:format> <dc:format>image/svg+xml</dc:format>
<dc:creator> <dc:creator>
<cc:Agent> <cc:Agent>
@ -41,12 +41,12 @@ z
<g id="xtick_1"> <g id="xtick_1">
<g id="line2d_1"> <g id="line2d_1">
<defs> <defs>
<path id="m2f6ba3216e" d="M 0 0 <path id="mfea16f89a5" d="M 0 0
L 0 3.5 L 0 3.5
" style="stroke: #000000; stroke-width: 0.8"/> " style="stroke: #000000; stroke-width: 0.8"/>
</defs> </defs>
<g> <g>
<use xlink:href="#m2f6ba3216e" x="137.644385" y="307.664" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mfea16f89a5" x="137.644385" y="307.664" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_1"> <g id="text_1">
@ -412,7 +412,7 @@ z
<g id="xtick_2"> <g id="xtick_2">
<g id="line2d_2"> <g id="line2d_2">
<g> <g>
<use xlink:href="#m2f6ba3216e" x="219.596257" y="307.664" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mfea16f89a5" x="219.596257" y="307.664" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_2"> <g id="text_2">
@ -583,7 +583,7 @@ z
<g id="xtick_3"> <g id="xtick_3">
<g id="line2d_3"> <g id="line2d_3">
<g> <g>
<use xlink:href="#m2f6ba3216e" x="301.548128" y="307.664" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mfea16f89a5" x="301.548128" y="307.664" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_3"> <g id="text_3">
@ -699,7 +699,7 @@ z
<g id="xtick_4"> <g id="xtick_4">
<g id="line2d_4"> <g id="line2d_4">
<g> <g>
<use xlink:href="#m2f6ba3216e" x="383.5" y="307.664" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mfea16f89a5" x="383.5" y="307.664" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_4"> <g id="text_4">
@ -894,7 +894,7 @@ z
<g id="xtick_5"> <g id="xtick_5">
<g id="line2d_5"> <g id="line2d_5">
<g> <g>
<use xlink:href="#m2f6ba3216e" x="465.451872" y="307.664" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mfea16f89a5" x="465.451872" y="307.664" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_5"> <g id="text_5">
@ -926,7 +926,7 @@ z
<g id="xtick_6"> <g id="xtick_6">
<g id="line2d_6"> <g id="line2d_6">
<g> <g>
<use xlink:href="#m2f6ba3216e" x="547.403743" y="307.664" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mfea16f89a5" x="547.403743" y="307.664" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_6"> <g id="text_6">
@ -1157,7 +1157,7 @@ z
<g id="xtick_7"> <g id="xtick_7">
<g id="line2d_7"> <g id="line2d_7">
<g> <g>
<use xlink:href="#m2f6ba3216e" x="629.355615" y="307.664" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#mfea16f89a5" x="629.355615" y="307.664" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_7"> <g id="text_7">
@ -1339,16 +1339,16 @@ z
<g id="line2d_8"> <g id="line2d_8">
<path d="M 77 307.664 <path d="M 77 307.664
L 690 307.664 L 690 307.664
" clip-path="url(#p4aa384bc7b)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#pfeb5048445)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_9"> <g id="line2d_9">
<defs> <defs>
<path id="mca54b52fb4" d="M 0 0 <path id="ma576030dcc" d="M 0 0
L -3.5 0 L -3.5 0
" style="stroke: #000000; stroke-width: 0.8"/> " style="stroke: #000000; stroke-width: 0.8"/>
</defs> </defs>
<g> <g>
<use xlink:href="#mca54b52fb4" x="77" y="307.664" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#ma576030dcc" x="77" y="307.664" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_8"> <g id="text_8">
@ -1394,11 +1394,11 @@ z
<g id="line2d_10"> <g id="line2d_10">
<path d="M 77 275.08244 <path d="M 77 275.08244
L 690 275.08244 L 690 275.08244
" clip-path="url(#p4aa384bc7b)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#pfeb5048445)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_11"> <g id="line2d_11">
<g> <g>
<use xlink:href="#mca54b52fb4" x="77" y="275.08244" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#ma576030dcc" x="77" y="275.08244" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_9"> <g id="text_9">
@ -1467,11 +1467,11 @@ z
<g id="line2d_12"> <g id="line2d_12">
<path d="M 77 242.500879 <path d="M 77 242.500879
L 690 242.500879 L 690 242.500879
" clip-path="url(#p4aa384bc7b)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#pfeb5048445)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_13"> <g id="line2d_13">
<g> <g>
<use xlink:href="#mca54b52fb4" x="77" y="242.500879" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#ma576030dcc" x="77" y="242.500879" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_10"> <g id="text_10">
@ -1487,11 +1487,11 @@ L 690 242.500879
<g id="line2d_14"> <g id="line2d_14">
<path d="M 77 209.919319 <path d="M 77 209.919319
L 690 209.919319 L 690 209.919319
" clip-path="url(#p4aa384bc7b)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#pfeb5048445)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_15"> <g id="line2d_15">
<g> <g>
<use xlink:href="#mca54b52fb4" x="77" y="209.919319" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#ma576030dcc" x="77" y="209.919319" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_11"> <g id="text_11">
@ -1523,11 +1523,11 @@ z
<g id="line2d_16"> <g id="line2d_16">
<path d="M 77 177.337759 <path d="M 77 177.337759
L 690 177.337759 L 690 177.337759
" clip-path="url(#p4aa384bc7b)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#pfeb5048445)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_17"> <g id="line2d_17">
<g> <g>
<use xlink:href="#mca54b52fb4" x="77" y="177.337759" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#ma576030dcc" x="77" y="177.337759" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_12"> <g id="text_12">
@ -1557,11 +1557,11 @@ z
<g id="line2d_18"> <g id="line2d_18">
<path d="M 77 144.756199 <path d="M 77 144.756199
L 690 144.756199 L 690 144.756199
" clip-path="url(#p4aa384bc7b)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#pfeb5048445)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_19"> <g id="line2d_19">
<g> <g>
<use xlink:href="#mca54b52fb4" x="77" y="144.756199" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#ma576030dcc" x="77" y="144.756199" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_13"> <g id="text_13">
@ -1578,11 +1578,11 @@ L 690 144.756199
<g id="line2d_20"> <g id="line2d_20">
<path d="M 77 112.174638 <path d="M 77 112.174638
L 690 112.174638 L 690 112.174638
" clip-path="url(#p4aa384bc7b)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#pfeb5048445)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_21"> <g id="line2d_21">
<g> <g>
<use xlink:href="#mca54b52fb4" x="77" y="112.174638" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#ma576030dcc" x="77" y="112.174638" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_14"> <g id="text_14">
@ -1599,11 +1599,11 @@ L 690 112.174638
<g id="line2d_22"> <g id="line2d_22">
<path d="M 77 79.593078 <path d="M 77 79.593078
L 690 79.593078 L 690 79.593078
" clip-path="url(#p4aa384bc7b)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> " clip-path="url(#pfeb5048445)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/>
</g> </g>
<g id="line2d_23"> <g id="line2d_23">
<g> <g>
<use xlink:href="#mca54b52fb4" x="77" y="79.593078" style="stroke: #000000; stroke-width: 0.8"/> <use xlink:href="#ma576030dcc" x="77" y="79.593078" style="stroke: #000000; stroke-width: 0.8"/>
</g> </g>
</g> </g>
<g id="text_15"> <g id="text_15">
@ -1780,7 +1780,7 @@ L 170.425134 307.664
L 170.425134 170.821447 L 170.425134 170.821447
L 104.863636 170.821447 L 104.863636 170.821447
z z
" clip-path="url(#p4aa384bc7b)" style="fill: #b3d1e6; opacity: 0.3"/> " clip-path="url(#pfeb5048445)" style="fill: #b3d1e6; opacity: 0.3"/>
</g> </g>
<g id="patch_8"> <g id="patch_8">
<path d="M 186.815508 307.664 <path d="M 186.815508 307.664
@ -1788,7 +1788,7 @@ L 252.377005 307.664
L 252.377005 169.518184 L 252.377005 169.518184
L 186.815508 169.518184 L 186.815508 169.518184
z z
" clip-path="url(#p4aa384bc7b)" style="fill: #b3d1e6; opacity: 0.3"/> " clip-path="url(#pfeb5048445)" style="fill: #b3d1e6; opacity: 0.3"/>
</g> </g>
<g id="patch_9"> <g id="patch_9">
<path d="M 268.76738 307.664 <path d="M 268.76738 307.664
@ -1796,7 +1796,7 @@ L 334.328877 307.664
L 334.328877 144.756199 L 334.328877 144.756199
L 268.76738 144.756199 L 268.76738 144.756199
z z
" clip-path="url(#p4aa384bc7b)" style="fill: #b3d1e6; opacity: 0.3"/> " clip-path="url(#pfeb5048445)" style="fill: #b3d1e6; opacity: 0.3"/>
</g> </g>
<g id="patch_10"> <g id="patch_10">
<path d="M 350.719251 307.664 <path d="M 350.719251 307.664
@ -1804,7 +1804,7 @@ L 416.280749 307.664
L 416.280749 127.813787 L 416.280749 127.813787
L 350.719251 127.813787 L 350.719251 127.813787
z z
" clip-path="url(#p4aa384bc7b)" style="fill: #b3d1e6; opacity: 0.3"/> " clip-path="url(#pfeb5048445)" style="fill: #b3d1e6; opacity: 0.3"/>
</g> </g>
<g id="patch_11"> <g id="patch_11">
<path d="M 432.671123 307.664 <path d="M 432.671123 307.664
@ -1812,7 +1812,7 @@ L 498.23262 307.664
L 498.23262 126.510525 L 498.23262 126.510525
L 432.671123 126.510525 L 432.671123 126.510525
z z
" clip-path="url(#p4aa384bc7b)" style="fill: #b3d1e6; opacity: 0.3"/> " clip-path="url(#pfeb5048445)" style="fill: #b3d1e6; opacity: 0.3"/>
</g> </g>
<g id="patch_12"> <g id="patch_12">
<path d="M 514.622995 307.664 <path d="M 514.622995 307.664
@ -1820,7 +1820,7 @@ L 580.184492 307.664
L 580.184492 86.10939 L 580.184492 86.10939
L 514.622995 86.10939 L 514.622995 86.10939
z z
" clip-path="url(#p4aa384bc7b)" style="fill: #155f91; opacity: 0.9"/> " clip-path="url(#pfeb5048445)" style="fill: #1a75c2; opacity: 0.9"/>
</g> </g>
<g id="patch_13"> <g id="patch_13">
<path d="M 596.574866 307.664 <path d="M 596.574866 307.664
@ -1828,7 +1828,7 @@ L 662.136364 307.664
L 662.136364 62.650667 L 662.136364 62.650667
L 596.574866 62.650667 L 596.574866 62.650667
z z
" clip-path="url(#p4aa384bc7b)" style="fill: #155f91; opacity: 0.9"/> " clip-path="url(#pfeb5048445)" style="fill: #1a75c2; opacity: 0.9"/>
</g> </g>
<g id="text_17"> <g id="text_17">
<!-- 10.5% --> <!-- 10.5% -->
@ -2356,7 +2356,7 @@ z
</g> </g>
</g> </g>
<defs> <defs>
<clipPath id="p4aa384bc7b"> <clipPath id="pfeb5048445">
<rect x="77" y="50.4" width="613" height="257.264"/> <rect x="77" y="50.4" width="613" height="257.264"/>
</clipPath> </clipPath>
</defs> </defs>

Before

Width:  |  Height:  |  Size: 57 KiB

After

Width:  |  Height:  |  Size: 57 KiB

Before After
Before After

View file

@ -54,7 +54,7 @@ def plot_swe_bench(data_file, is_lite):
if is_lite: if is_lite:
colors = ["#17965A" if "Aider" in model else "#b3d1e6" for model in models] colors = ["#17965A" if "Aider" in model else "#b3d1e6" for model in models]
else: else:
colors = ["#155F91" if "Aider" in model else "#b3d1e6" for model in models] colors = ["#1A75C2" if "Aider" in model else "#b3d1e6" for model in models]
bars = [] bars = []
for model, pass_rate, color in zip(models, pass_rates, colors): for model, pass_rate, color in zip(models, pass_rates, colors):