Merge branch 'main' into polyglot-qwen2.5-coder-32b-instruct-whole-results

This commit is contained in:
paul-gauthier 2025-01-03 09:35:14 -04:00 committed by GitHub
commit 42f6c20ada
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
33 changed files with 1349 additions and 1103 deletions

View file

@ -23,11 +23,19 @@ cog.out(text)
]]]-->
### main branch
### Aider v0.70.0
- Full support for o1 models.
- Watch files now honors `--subtree-only`, and only watches that subtree.
- Improved prompting for watch files, to work more reliably with more models.
- New install methods via uv, including one-liners.
- Support for openrouter/deepseek/deepseek-chat model.
- Better error handling when interactive commands are attempted via `/load` or `--load`.
- Display read-only files with abs path if its shorter than rel path.
- Ask 10% of users to opt-in to analytics.
- Aider wrote 75% of the code in this release.
- Bugfix for auto-suggest.
- Gracefully handle unicode errors in git path names.
- Aider wrote 74% of the code in this release.
### Aider v0.69.1

View file

@ -3457,3 +3457,91 @@
Paul Gauthier (aider): 207
start_tag: v0.68.0
total_lines: 305
- aider_percentage: 74.22
aider_total: 875
end_date: '2024-12-26'
end_tag: v0.70.0
file_counts:
aider/__init__.py:
Paul Gauthier: 1
aider/analytics.py:
Paul Gauthier: 6
Paul Gauthier (aider): 41
aider/args.py:
Evan Johnson: 2
aider/coders/search_replace.py:
Paul Gauthier: 5
aider/commands.py:
Paul Gauthier (aider): 41
aider/help_pats.py:
Paul Gauthier: 3
aider/io.py:
Paul Gauthier: 7
Paul Gauthier (aider): 9
aider/main.py:
Paul Gauthier: 15
Paul Gauthier (aider): 5
apaz-cli: 3
mdk: 6
aider/models.py:
Paul Gauthier: 29
aider/repo.py:
Paul Gauthier: 14
aider/utils.py:
Paul Gauthier: 2
aider/watch.py:
Paul Gauthier: 13
aider/website/_includes/head_custom.html:
Paul Gauthier (aider): 4
aider/website/_includes/leaderboard.js:
Paul Gauthier (aider): 14
aider/website/docs/leaderboards/index.md:
Paul Gauthier: 28
Paul Gauthier (aider): 2
benchmark/Dockerfile:
Paul Gauthier: 8
Paul Gauthier (aider): 43
benchmark/benchmark.py:
Paul Gauthier: 69
Paul Gauthier (aider): 153
benchmark/clone-exercism.sh:
Paul Gauthier: 2
Paul Gauthier (aider): 18
benchmark/cpp-test.sh:
Paul Gauthier: 10
Paul Gauthier (aider): 1
benchmark/docker.sh:
Paul Gauthier (aider): 4
benchmark/install-docker-ubuntu.sh:
Paul Gauthier (aider): 63
benchmark/npm-test.sh:
Paul Gauthier: 10
Paul Gauthier (aider): 3
benchmark/problem_stats.py:
Paul Gauthier: 35
Paul Gauthier (aider): 318
benchmark/rsync.sh:
Paul Gauthier: 7
Paul Gauthier (aider): 26
scripts/blame.py:
Paul Gauthier (aider): 6
scripts/my_models.py:
Paul Gauthier (aider): 95
scripts/update-blame.sh:
Paul Gauthier (aider): 3
scripts/update-docs.sh:
Paul Gauthier: 1
tests/basic/test_analytics.py:
Paul Gauthier (aider): 19
tests/basic/test_main.py:
Paul Gauthier (aider): 7
tests/basic/test_sanity_check_repo.py:
mdk: 28
grand_total:
Evan Johnson: 2
Paul Gauthier: 265
Paul Gauthier (aider): 875
apaz-cli: 3
mdk: 34
start_tag: v0.69.0
total_lines: 1179

View file

@ -257,10 +257,36 @@
versions: 0.69.2.dev
seconds_per_case: 12.2
total_cost: 0.0000
- dirname: 2024-12-23-01-11-56--yi-test
test_cases: 225
model: yi-lightning
edit_format: whole
commit_hash: 2b1625e
pass_rate_1: 5.8
pass_rate_2: 12.9
pass_num_1: 13
pass_num_2: 29
percent_cases_well_formed: 92.9
error_outputs: 87
num_malformed_responses: 72
num_with_malformed_responses: 16
user_asks: 107
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 1
test_timeouts: 6
total_tests: 225
command: aider --model openai/yi-lightning
date: 2024-12-23
versions: 0.69.2.dev
seconds_per_case: 146.7
total_cost: 0.0000
- dirname: 2024-12-25-13-31-51--deepseekv3preview-diff2
test_cases: 225
model: DeepSeek Chat V3 Preview
model: DeepSeek Chat V3
edit_format: diff
commit_hash: 0a23c4a-dirty
pass_rate_1: 22.7
@ -308,4 +334,5 @@
date: 2024-12-26
versions: 0.69.2.dev
seconds_per_case: 42.0
total_cost: 0.0000
total_cost: 0.0000

View file

@ -1,5 +1,18 @@
<canvas id="blameChart" width="800" height="360" style="margin-top: 20px"></canvas>
<canvas id="linesChart" width="800" height="360" style="margin-top: 20px"></canvas>
<div class="chart-container">
<canvas id="blameChart" style="margin-top: 20px"></canvas>
</div>
<div class="chart-container">
<canvas id="linesChart" style="margin-top: 20px"></canvas>
</div>
<style>
.chart-container {
position: relative;
width: 100%;
height: 300px;
}
</style>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://cdn.jsdelivr.net/npm/moment"></script>
<script src="https://cdn.jsdelivr.net/npm/chartjs-adapter-moment"></script>
@ -24,10 +37,17 @@ document.addEventListener('DOMContentLoaded', function () {
var linesData = {
labels: labels,
datasets: [{
label: 'Aider\'s lines of new code',
label: 'Aider',
data: [{% for row in site.data.blame %}{ x: '{{ row.end_tag }}', y: {{ row.aider_total }} },{% endfor %}],
backgroundColor: 'rgba(255, 99, 132, 0.8)',
borderColor: 'rgba(255, 99, 132, 1)',
backgroundColor: 'rgba(54, 162, 235, 0.8)',
borderColor: 'rgba(54, 162, 235, 1)',
borderWidth: 1
},
{
label: 'Human',
data: [{% for row in site.data.blame %}{ x: '{{ row.end_tag }}', y: {{ row.total_lines | minus: row.aider_total }} },{% endfor %}],
backgroundColor: 'rgba(200, 200, 200, 0.8)',
borderColor: 'rgba(200, 200, 200, 1)',
borderWidth: 1
}]
};
@ -36,6 +56,7 @@ document.addEventListener('DOMContentLoaded', function () {
type: 'bar',
data: blameData,
options: {
maintainAspectRatio: false,
scales: {
x: {
type: 'category',
@ -85,9 +106,11 @@ document.addEventListener('DOMContentLoaded', function () {
type: 'bar',
data: linesData,
options: {
maintainAspectRatio: false,
scales: {
x: {
type: 'category',
stacked: true,
title: {
display: true,
text: 'Version'
@ -98,6 +121,7 @@ document.addEventListener('DOMContentLoaded', function () {
}
},
y: {
stacked: true,
title: {
display: true,
text: 'Lines of new code'
@ -107,12 +131,14 @@ document.addEventListener('DOMContentLoaded', function () {
},
plugins: {
legend: {
display: false
display: true,
position: 'chartArea',
reverse: true
},
tooltip: {
callbacks: {
label: function(context) {
var label = 'New lines of code by aider';
var label = context.dataset.label;
var value = context.parsed.y || 0;
return `${label}: ${value}`;
}
@ -120,7 +146,7 @@ document.addEventListener('DOMContentLoaded', function () {
},
title: {
display: true,
text: 'Lines of new code written by aider, by release',
text: 'Lines of new code, by release',
font: {
size: 16
}

View file

@ -1,2 +1 @@
Aider works best with GPT-4o & Claude 3.5 Sonnet and can
[connect to almost any LLM](https://aider.chat/docs/llms.html).
Aider works best with Claude 3.5 Sonnet, DeepSeek V3, o1 & GPT-4o and can [connect to almost any LLM](https://aider.chat/docs/llms.html).

File diff suppressed because it is too large Load diff

View file

@ -44,7 +44,7 @@
## Use gpt-3.5-turbo model for the main chat
#35turbo: false
## Use deepseek/deepseek-coder model for the main chat
## Use deepseek/deepseek-chat model for the main chat
#deepseek: false
## Use o1-mini model for the main chat

View file

@ -48,7 +48,7 @@
## Use gpt-3.5-turbo model for the main chat
#AIDER_35TURBO=
## Use deepseek/deepseek-coder model for the main chat
## Use deepseek/deepseek-chat model for the main chat
#AIDER_DEEPSEEK=
## Use o1-mini model for the main chat

View file

@ -96,7 +96,7 @@ cog.outl("```")
## Use gpt-3.5-turbo model for the main chat
#35turbo: false
## Use deepseek/deepseek-coder model for the main chat
## Use deepseek/deepseek-chat model for the main chat
#deepseek: false
## Use o1-mini model for the main chat

View file

@ -88,7 +88,7 @@ cog.outl("```")
## Use gpt-3.5-turbo model for the main chat
#AIDER_35TURBO=
## Use deepseek/deepseek-coder model for the main chat
## Use deepseek/deepseek-chat model for the main chat
#AIDER_DEEPSEEK=
## Use o1-mini model for the main chat

View file

@ -55,7 +55,7 @@ for alias, model in sorted(MODEL_ALIASES.items()):
- `4`: gpt-4-0613
- `4-turbo`: gpt-4-1106-preview
- `4o`: gpt-4o
- `deepseek`: deepseek/deepseek-coder
- `deepseek`: deepseek/deepseek-chat
- `flash`: gemini/gemini-2.0-flash-exp
- `haiku`: claude-3-5-haiku-20241022
- `opus`: claude-3-opus-20240229

View file

@ -136,7 +136,7 @@ Aliases:
- `-3`
### `--deepseek`
Use deepseek/deepseek-coder model for the main chat
Use deepseek/deepseek-chat model for the main chat
Environment variable: `AIDER_DEEPSEEK`
### `--o1-mini`

View file

@ -237,16 +237,12 @@ tr:hover { background-color: #f5f5f5; }
</style>
<table>
<tr><th>Model Name</th><th class='right'>Total Tokens</th><th class='right'>Percent</th></tr>
<tr><td>claude-3-5-sonnet-20241022</td><td class='right'>1,918,086</td><td class='right'>98.0%</td></tr>
<tr><td>o1-preview</td><td class='right'>33,530</td><td class='right'>1.7%</td></tr>
<tr><td>gpt-4o</td><td class='right'>4,273</td><td class='right'>0.2%</td></tr>
<tr><td>openrouter/REDACTED</td><td class='right'>1,234</td><td class='right'>0.1%</td></tr>
<tr><td>openai/gpt-4o-mini</td><td class='right'>141</td><td class='right'>0.0%</td></tr>
<tr><td>deepseek/deepseek-chat</td><td class='right'>1,422,360</td><td class='right'>62.0%</td></tr>
<tr><td>claude-3-5-sonnet-20241022</td><td class='right'>823,708</td><td class='right'>35.9%</td></tr>
<tr><td>o1</td><td class='right'>37,290</td><td class='right'>1.6%</td></tr>
<tr><td>gemini/gemini-2.0-flash-exp</td><td class='right'>4,827</td><td class='right'>0.2%</td></tr>
<tr><td>gpt-4o</td><td class='right'>4,473</td><td class='right'>0.2%</td></tr>
</table>
{: .note :}
Some models show as REDACTED, because they are new or unpopular models.
Aider's analytics only records the names of "well known" LLMs.
<!--[[[end]]]-->
## How are the "aider wrote xx% of code" stats computed?

View file

@ -20,8 +20,8 @@ it works best with models that score well on the benchmarks.
{: .note :}
The old
[aider code editing leaderboard](edit.html)
The
[original aider code editing leaderboard](edit.html)
has been replaced by this
new, much more challenging
[polyglot leaderboard](https://aider.chat/2024/12/21/polyglot.html).
@ -122,6 +122,6 @@ mod_dates = [get_last_modified_date(file) for file in files]
latest_mod_date = max(mod_dates)
cog.out(f"{latest_mod_date.strftime('%B %d, %Y.')}")
]]]-->
December 22, 2024.
December 26, 2024.
<!--[[[end]]]-->
</p>

View file

@ -53,6 +53,12 @@ To use aider installed via `pipx` with AWS Bedrock, you must add the `boto3` dep
pipx inject aider-chat boto3
```
You must install `boto3` dependency to aider's virtual environment installed via one-liner or uv by running
```bash
uv tool run --from aider-chat pip install boto3
```
## Running Aider with Bedrock

View file

@ -95,6 +95,8 @@ cog.out(''.join(lines))
- TOGETHERAI_API_KEY
- VOLCENGINE_API_KEY
- VOYAGE_API_KEY
- WATSONX_API_KEY
- WX_API_KEY
- XAI_API_KEY
- XINFERENCE_API_KEY
<!--[[[end]]]-->

View file

@ -32,9 +32,8 @@ cog.out(text)
Aider lets you pair program with LLMs,
to edit code in your local git repository.
Start a new project or work with an existing git repo.
Aider works best with GPT-4o & Claude 3.5 Sonnet and can
[connect to almost any LLM](https://aider.chat/docs/llms.html).
Start a new project or work with an existing code base.
{% include works-best.md %}
<!--
<p align="center">
@ -105,7 +104,7 @@ for more details.
- Aider will edit your files to complete your request.
- Aider [automatically git commits](https://aider.chat/docs/git.html) changes with a sensible commit message.
- Aider works with [most popular languages](https://aider.chat/docs/languages.html): python, javascript, typescript, php, html, css, and more...
- Aider works best with GPT-4o & Claude 3.5 Sonnet and can [connect to almost any LLM](https://aider.chat/docs/llms.html).
- {% capture included_content %}{% include works-best.md %}{% endcapture %}{{ included_content | strip_newlines | strip }}
- Aider can edit multiple files at once for complex requests.
- Aider uses a [map of your entire git repo](https://aider.chat/docs/repomap.html), which helps it work well in larger codebases.
- Edit files in your editor while chatting with aider,