feat: Add line graph to visualize Sonnet performance over time

This commit is contained in:
Paul Gauthier (aider) 2024-08-26 20:25:34 -07:00
parent 95c14e945e
commit 8816d22591

View file

@ -17,4 +17,67 @@ There has been a lot of speculation that Sonnet has been
dumbed-down, nerfed or otherwise performing worse lately.
Sonnet seems as good as ever.
Here's a graph showing the performance of Claude 3.5 Sonnet over time:
<div class="chart-container" style="position: relative; height:400px; width:100%">
<canvas id="sonnetPerformanceChart"></canvas>
</div>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script>
document.addEventListener('DOMContentLoaded', function() {
var ctx = document.getElementById('sonnetPerformanceChart').getContext('2d');
var sonnetData = {{ site.data.sonnet-fine | jsonify }};
var dates = sonnetData.map(item => item.date);
var passRate1 = sonnetData.map(item => item.pass_rate_1);
var passRate2 = sonnetData.map(item => item.pass_rate_2);
new Chart(ctx, {
type: 'line',
data: {
labels: dates,
datasets: [{
label: 'Pass Rate 1',
data: passRate1,
borderColor: 'rgb(75, 192, 192)',
tension: 0.1
}, {
label: 'Pass Rate 2',
data: passRate2,
borderColor: 'rgb(255, 99, 132)',
tension: 0.1
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
scales: {
y: {
beginAtZero: true,
title: {
display: true,
text: 'Pass Rate (%)'
}
},
x: {
title: {
display: true,
text: 'Date'
}
}
},
plugins: {
title: {
display: true,
text: 'Claude 3.5 Sonnet Performance Over Time'
}
}
}
});
});
</script>
This graph shows the performance of Claude 3.5 Sonnet on the aider code editing benchmark over time. 'Pass Rate 1' represents the initial success rate, while 'Pass Rate 2' shows the success rate after a second attempt. As you can see, there's no significant decline in performance, suggesting that Sonnet's capabilities have remained stable since its launch.