mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-01 10:14:59 +00:00
feat: Add a graph that displays the Pass Rate for each Senior/Junior/EditFormat
This commit is contained in:
parent
222b9cff09
commit
63a4ce0fe2
1 changed files with 76 additions and 1 deletions
|
@ -42,10 +42,85 @@ pair programming AI coding experience.
|
|||
|
||||
## Results
|
||||
|
||||
The graph above and table below show the
|
||||
The graph below and table show the
|
||||
[aider's code editing benchmark](/docs/benchmarks.html#the-benchmark)
|
||||
score for various combinations of Senior and Junior models.
|
||||
|
||||
<div>
|
||||
<canvas id="seniorJuniorChart"></canvas>
|
||||
</div>
|
||||
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
const ctx = document.getElementById('seniorJuniorChart').getContext('2d');
|
||||
|
||||
const data = {
|
||||
labels: [],
|
||||
datasets: []
|
||||
};
|
||||
|
||||
{% assign sorted_data = site.data.senior | sort: "pass_rate_2" | reverse %}
|
||||
{% assign grouped_data = sorted_data | group_by: "model" %}
|
||||
|
||||
{% for group in grouped_data %}
|
||||
const dataset = {
|
||||
label: '{{ group.name }}',
|
||||
data: [],
|
||||
backgroundColor: getRandomColor(),
|
||||
};
|
||||
|
||||
{% for item in group.items %}
|
||||
data.labels.push('{{ item.junior_model }} ({{ item.junior_edit_format | default: item.edit_format }})');
|
||||
dataset.data.push({{ item.pass_rate_2 }});
|
||||
{% endfor %}
|
||||
|
||||
data.datasets.push(dataset);
|
||||
{% endfor %}
|
||||
|
||||
new Chart(ctx, {
|
||||
type: 'bar',
|
||||
data: data,
|
||||
options: {
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
plugins: {
|
||||
title: {
|
||||
display: true,
|
||||
text: 'Pass Rate for Senior/Junior/EditFormat Combinations'
|
||||
},
|
||||
legend: {
|
||||
position: 'top',
|
||||
}
|
||||
},
|
||||
scales: {
|
||||
x: {
|
||||
stacked: true,
|
||||
},
|
||||
y: {
|
||||
stacked: true,
|
||||
beginAtZero: true,
|
||||
max: 100,
|
||||
title: {
|
||||
display: true,
|
||||
text: 'Pass Rate (%)'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
function getRandomColor() {
|
||||
const letters = '0123456789ABCDEF';
|
||||
let color = '#';
|
||||
for (let i = 0; i < 6; i++) {
|
||||
color += letters[Math.floor(Math.random() * 16)];
|
||||
}
|
||||
return color;
|
||||
}
|
||||
</script>
|
||||
|
||||
Some noteworthy observations:
|
||||
|
||||
- o1-preview with Deepseek as the Junior surprises as the SOTA result, beating other stronger Junior models. This result is obtained with Deepseek using the "whole" editing format, requiring it to output a full update copy of each edited source file. This is quite slow, and so probably not practical for interactive use with aider.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue