feat: Add support for using two models to complete each coding task

This commit is contained in:
Paul Gauthier 2024-09-26 10:18:03 -07:00 committed by Paul Gauthier (aider)
parent a4df572cfe
commit 975f35dfbc

View file

@ -19,6 +19,108 @@ Aider now has experimental support for using two models to complete each coding
Splitting up "code reasoning" and "code editing" has produced SOTA results on Splitting up "code reasoning" and "code editing" has produced SOTA results on
[aider's code editing benchmark](/docs/benchmarks.html#the-benchmark). [aider's code editing benchmark](/docs/benchmarks.html#the-benchmark).
<style>
.shaded td {
background-color: #f2f2f2;
border-top: 1px solid #ccc;
}
table {
border-collapse: collapse;
width: 100%;
}
th {
padding: 8px;
text-align: left;
border-bottom: 1px solid #ddd;
}
th {
background-color: #e2e2e2;
}
</style>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
{% assign sorted_data = site.data.senior | sort: "pass_rate_2" | reverse %}
<canvas id="passRateChart" width="400" height="200"></canvas>
<script>
document.addEventListener("DOMContentLoaded", function() {
var ctx = document.getElementById('passRateChart').getContext('2d');
var labels = [];
var data = [];
var colorMapping = {
"claude-3.5-sonnet": "rgba(75, 192, 192, 0.2)",
"o1-mini": "rgba(255, 99, 132, 0.2)",
"gpt-4o": "rgba(54, 162, 235, 0.2)",
"o1-preview": "rgba(255, 206, 86, 0.2)"
};
var borderColorMapping = {
"claude-3.5-sonnet": "rgba(75, 192, 192, 1)",
"o1-mini": "rgba(255, 99, 132, 1)",
"gpt-4o": "rgba(54, 162, 235, 1)",
"o1-preview": "rgba(255, 206, 86, 1)"
};
var backgroundColors = [];
var borderColors = [];
{% assign grouped_data = sorted_data | group_by: "model" %}
{% for group in grouped_data %}
{% for item in group.items %}
labels.push("{{ item.junior_model | default: "(No Junior)" }} {{ item.junior_edit_format | default: item.edit_format }}");
data.push({{ item.pass_rate_2 }});
backgroundColors.push(colorMapping["{{ item.model }}"]);
borderColors.push(borderColorMapping["{{ item.model }}"]);
{% endfor %}
{% endfor %}
new Chart(ctx, {
type: 'bar',
data: {
labels: labels,
datasets: [{
label: 'Pass Rate',
data: data,
backgroundColor: 'rgba(75, 192, 192, 0.2)',
borderColor: 'rgba(75, 192, 192, 1)',
borderWidth: 1,
backgroundColor: backgroundColors,
borderColor: borderColors
}]
},
options: {
scales: {
y: {
beginAtZero: true,
title: {
display: true,
text: 'Pass Rate (%)'
}
}
},
plugins: {
legend: {
display: true,
labels: {
generateLabels: function(chart) {
var colorMapping = {
"o1-preview": "rgba(255, 206, 86, 0.2)",
"claude-3.5-sonnet": "rgba(75, 192, 192, 0.2)",
"gpt-4o": "rgba(54, 162, 235, 0.2)",
"o1-mini": "rgba(255, 99, 132, 0.2)"
};
return Object.keys(colorMapping).map(function(key) {
return {
text: key,
fillStyle: colorMapping[key],
strokeStyle: colorMapping[key].replace('0.2', '1'),
lineWidth: 1
};
});
}
}
}
}
}});
});
</script>
## Motivation ## Motivation
This approach was motivated by OpenAI's recently release o1 models. This approach was motivated by OpenAI's recently release o1 models.
@ -89,106 +191,6 @@ aider --o1-preview --senior
## Full results ## Full results
<style>
.shaded td {
background-color: #f2f2f2;
border-top: 1px solid #ccc;
}
table {
border-collapse: collapse;
width: 100%;
}
th {
padding: 8px;
text-align: left;
border-bottom: 1px solid #ddd;
}
th {
background-color: #e2e2e2;
}
</style>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
{% assign sorted_data = site.data.senior | sort: "pass_rate_2" | reverse %}
<canvas id="passRateChart" width="400" height="200"></canvas>
<script>
document.addEventListener("DOMContentLoaded", function() {
var ctx = document.getElementById('passRateChart').getContext('2d');
var labels = [];
var data = [];
var colorMapping = {
"claude-3.5-sonnet": "rgba(75, 192, 192, 0.2)",
"o1-mini": "rgba(255, 99, 132, 0.2)",
"gpt-4o": "rgba(54, 162, 235, 0.2)",
"o1-preview": "rgba(255, 206, 86, 0.2)"
};
var borderColorMapping = {
"claude-3.5-sonnet": "rgba(75, 192, 192, 1)",
"o1-mini": "rgba(255, 99, 132, 1)",
"gpt-4o": "rgba(54, 162, 235, 1)",
"o1-preview": "rgba(255, 206, 86, 1)"
};
var backgroundColors = [];
var borderColors = [];
{% assign grouped_data = sorted_data | group_by: "model" %}
{% for group in grouped_data %}
{% for item in group.items %}
labels.push("{{ item.model }} - {{ item.junior_model }}");
data.push({{ item.pass_rate_2 }});
backgroundColors.push(colorMapping["{{ item.model }}"]);
borderColors.push(borderColorMapping["{{ item.model }}"]);
{% endfor %}
{% endfor %}
new Chart(ctx, {
type: 'bar',
data: {
labels: labels,
datasets: [{
label: 'Pass Rate',
data: data,
backgroundColor: 'rgba(75, 192, 192, 0.2)',
borderColor: 'rgba(75, 192, 192, 1)',
borderWidth: 1,
backgroundColor: backgroundColors,
borderColor: borderColors
}]
},
options: {
scales: {
y: {
beginAtZero: true,
title: {
display: true,
text: 'Pass Rate (%)'
}
}
},
plugins: {
legend: {
display: true,
labels: {
generateLabels: function(chart) {
var colorMapping = {
"claude-3.5-sonnet": "rgba(75, 192, 192, 0.2)",
"o1-mini": "rgba(255, 99, 132, 0.2)",
"gpt-4o": "rgba(54, 162, 235, 0.2)",
"o1-preview": "rgba(255, 206, 86, 0.2)"
};
return Object.keys(colorMapping).map(function(key) {
return {
text: key,
fillStyle: colorMapping[key],
strokeStyle: colorMapping[key].replace('0.2', '1'),
lineWidth: 1
};
});
}
}
}
}
});
});
</script>
<table> <table>
<thead> <thead>