mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-01 18:25:00 +00:00
copy
This commit is contained in:
parent
7310f0928f
commit
b3ed2c8a48
2 changed files with 62 additions and 61 deletions
|
@ -1,3 +1,25 @@
|
||||||
|
- dirname: 2024-08-14-18-26-18--json-gpt-4o-2024-08-06-whole
|
||||||
|
test_cases: 133
|
||||||
|
model: gpt-4o-2024-08-06
|
||||||
|
edit_format: Markdown
|
||||||
|
commit_hash: 94a2601-dirty
|
||||||
|
pass_rate_1: 62.4
|
||||||
|
percent_cases_well_formed: 100.0
|
||||||
|
error_outputs: 0
|
||||||
|
num_malformed_responses: 0
|
||||||
|
num_with_malformed_responses: 0
|
||||||
|
user_asks: 0
|
||||||
|
lazy_comments: 0
|
||||||
|
syntax_errors: 0
|
||||||
|
indentation_errors: 0
|
||||||
|
exhausted_context_windows: 0
|
||||||
|
test_timeouts: 3
|
||||||
|
command: aider --model gpt-4o-2024-08-06
|
||||||
|
date: 2024-08-14
|
||||||
|
versions: 0.50.2-dev
|
||||||
|
seconds_per_case: 6.8
|
||||||
|
total_cost: 1.2717
|
||||||
|
|
||||||
- dirname: 2024-08-14-18-38-25--json-gpt-4o-2024-08-06-non-strict-func
|
- dirname: 2024-08-14-18-38-25--json-gpt-4o-2024-08-06-non-strict-func
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: gpt-4o-2024-08-06
|
model: gpt-4o-2024-08-06
|
||||||
|
@ -42,53 +64,9 @@
|
||||||
seconds_per_case: 12.7
|
seconds_per_case: 12.7
|
||||||
total_cost: 1.3652
|
total_cost: 1.3652
|
||||||
|
|
||||||
- dirname: 2024-08-14-18-26-18--json-gpt-4o-2024-08-06-whole
|
|
||||||
test_cases: 133
|
|
||||||
model: gpt-4o-2024-08-06
|
|
||||||
edit_format: Markdown
|
|
||||||
commit_hash: 94a2601-dirty
|
|
||||||
pass_rate_1: 62.4
|
|
||||||
percent_cases_well_formed: 100.0
|
|
||||||
error_outputs: 0
|
|
||||||
num_malformed_responses: 0
|
|
||||||
num_with_malformed_responses: 0
|
|
||||||
user_asks: 0
|
|
||||||
lazy_comments: 0
|
|
||||||
syntax_errors: 0
|
|
||||||
indentation_errors: 0
|
|
||||||
exhausted_context_windows: 0
|
|
||||||
test_timeouts: 3
|
|
||||||
command: aider --model gpt-4o-2024-08-06
|
|
||||||
date: 2024-08-14
|
|
||||||
versions: 0.50.2-dev
|
|
||||||
seconds_per_case: 6.8
|
|
||||||
total_cost: 1.2717
|
|
||||||
|
|
||||||
- dirname: 2024-08-14-20-19-23--json-sonnet-non-strict-func
|
|
||||||
test_cases: 133
|
|
||||||
model: openrouter/anthropic/claude-3.5-sonnet
|
|
||||||
edit_format: Tool call
|
|
||||||
commit_hash: e2f14a2
|
|
||||||
pass_rate_1: 52.6
|
|
||||||
percent_cases_well_formed: 100.0
|
|
||||||
error_outputs: 1
|
|
||||||
num_malformed_responses: 0
|
|
||||||
num_with_malformed_responses: 0
|
|
||||||
user_asks: 1
|
|
||||||
lazy_comments: 0
|
|
||||||
syntax_errors: 1
|
|
||||||
indentation_errors: 0
|
|
||||||
exhausted_context_windows: 0
|
|
||||||
test_timeouts: 0
|
|
||||||
command: aider --model openrouter/anthropic/claude-3.5-sonnet
|
|
||||||
date: 2024-08-14
|
|
||||||
versions: 0.50.2-dev
|
|
||||||
seconds_per_case: 18.9
|
|
||||||
total_cost: 2.6341
|
|
||||||
|
|
||||||
- dirname: 2024-08-14-20-15-19--json-sonnet-whole
|
- dirname: 2024-08-14-20-15-19--json-sonnet-whole
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: openrouter/anthropic/claude-3.5-sonnet
|
model: claude-3.5-sonnet
|
||||||
edit_format: Markdown
|
edit_format: Markdown
|
||||||
commit_hash: e2f14a2
|
commit_hash: e2f14a2
|
||||||
pass_rate_1: 58.6
|
pass_rate_1: 58.6
|
||||||
|
@ -102,37 +80,37 @@
|
||||||
indentation_errors: 0
|
indentation_errors: 0
|
||||||
exhausted_context_windows: 0
|
exhausted_context_windows: 0
|
||||||
test_timeouts: 0
|
test_timeouts: 0
|
||||||
command: aider --model openrouter/anthropic/claude-3.5-sonnet
|
command: aider --model claude-3.5-sonnet
|
||||||
date: 2024-08-14
|
date: 2024-08-14
|
||||||
versions: 0.50.2-dev
|
versions: 0.50.2-dev
|
||||||
seconds_per_case: 19.7
|
seconds_per_case: 19.7
|
||||||
total_cost: 2.5335
|
total_cost: 2.5335
|
||||||
|
|
||||||
- dirname: 2024-08-14-21-20-46--json-deepseek-non-strict-func
|
- dirname: 2024-08-14-20-19-23--json-sonnet-non-strict-func
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: openrouter/deepseek/deepseek-coder
|
model: claude-3.5-sonnet
|
||||||
edit_format: Tool call
|
edit_format: Tool call
|
||||||
commit_hash: e2f14a2
|
commit_hash: e2f14a2
|
||||||
pass_rate_1: 54.1
|
pass_rate_1: 52.6
|
||||||
percent_cases_well_formed: 100.0
|
percent_cases_well_formed: 100.0
|
||||||
error_outputs: 9
|
error_outputs: 1
|
||||||
num_malformed_responses: 0
|
num_malformed_responses: 0
|
||||||
num_with_malformed_responses: 0
|
num_with_malformed_responses: 0
|
||||||
user_asks: 5
|
user_asks: 1
|
||||||
lazy_comments: 0
|
lazy_comments: 0
|
||||||
syntax_errors: 2
|
syntax_errors: 1
|
||||||
indentation_errors: 0
|
indentation_errors: 0
|
||||||
exhausted_context_windows: 0
|
exhausted_context_windows: 0
|
||||||
test_timeouts: 0
|
test_timeouts: 0
|
||||||
command: aider --model openrouter/deepseek/deepseek-coder
|
command: aider --model claude-3.5-sonnet
|
||||||
date: 2024-08-14
|
date: 2024-08-14
|
||||||
versions: 0.50.2-dev
|
versions: 0.50.2-dev
|
||||||
seconds_per_case: 17.4
|
seconds_per_case: 18.9
|
||||||
total_cost: 0.0332
|
total_cost: 2.6341
|
||||||
|
|
||||||
- dirname: 2024-08-14-21-23-27--json-deepseek-whole
|
- dirname: 2024-08-14-21-23-27--json-deepseek-whole
|
||||||
test_cases: 133
|
test_cases: 133
|
||||||
model: openrouter/deepseek/deepseek-coder
|
model: deepseek-coder
|
||||||
edit_format: Markdown
|
edit_format: Markdown
|
||||||
commit_hash: e2f14a2
|
commit_hash: e2f14a2
|
||||||
pass_rate_1: 61.7
|
pass_rate_1: 61.7
|
||||||
|
@ -146,9 +124,31 @@
|
||||||
indentation_errors: 0
|
indentation_errors: 0
|
||||||
exhausted_context_windows: 0
|
exhausted_context_windows: 0
|
||||||
test_timeouts: 1
|
test_timeouts: 1
|
||||||
command: aider --model openrouter/deepseek/deepseek-coder
|
command: aider --model deepseek-coder
|
||||||
date: 2024-08-14
|
date: 2024-08-14
|
||||||
versions: 0.50.2-dev
|
versions: 0.50.2-dev
|
||||||
seconds_per_case: 23.0
|
seconds_per_case: 23.0
|
||||||
total_cost: 0.0439
|
total_cost: 0.0439
|
||||||
|
|
||||||
|
- dirname: 2024-08-14-21-20-46--json-deepseek-non-strict-func
|
||||||
|
test_cases: 133
|
||||||
|
model: deepseek-coder
|
||||||
|
edit_format: Tool call
|
||||||
|
commit_hash: e2f14a2
|
||||||
|
pass_rate_1: 54.1
|
||||||
|
percent_cases_well_formed: 100.0
|
||||||
|
error_outputs: 9
|
||||||
|
num_malformed_responses: 0
|
||||||
|
num_with_malformed_responses: 0
|
||||||
|
user_asks: 5
|
||||||
|
lazy_comments: 0
|
||||||
|
syntax_errors: 2
|
||||||
|
indentation_errors: 0
|
||||||
|
exhausted_context_windows: 0
|
||||||
|
test_timeouts: 0
|
||||||
|
command: aider --model deepseek-coder
|
||||||
|
date: 2024-08-14
|
||||||
|
versions: 0.50.2-dev
|
||||||
|
seconds_per_case: 17.4
|
||||||
|
total_cost: 0.0332
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,9 @@ nav_exclude: true
|
||||||
<p class="post-date">{{ page.date | date: "%B %d, %Y" }}</p>
|
<p class="post-date">{{ page.date | date: "%B %d, %Y" }}</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
# LLMs are bad at returning code in json
|
||||||
|
|
||||||
|
|
||||||
<canvas id="passRateChart" width="800" height="400" style="margin-bottom: 20px"></canvas>
|
<canvas id="passRateChart" width="800" height="400" style="margin-bottom: 20px"></canvas>
|
||||||
|
|
||||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||||
|
@ -55,13 +58,13 @@ document.addEventListener('DOMContentLoaded', function () {
|
||||||
display: true,
|
display: true,
|
||||||
text: 'Pass Rate (%)'
|
text: 'Pass Rate (%)'
|
||||||
},
|
},
|
||||||
max: 100
|
max: 70
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
plugins: {
|
plugins: {
|
||||||
title: {
|
title: {
|
||||||
display: true,
|
display: true,
|
||||||
text: 'Pass Rate by Model and Edit Format',
|
text: 'Pass rate by model and code return strategy',
|
||||||
font: {
|
font: {
|
||||||
size: 16
|
size: 16
|
||||||
}
|
}
|
||||||
|
@ -77,8 +80,6 @@ document.addEventListener('DOMContentLoaded', function () {
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
# LLMs are bad at returning code in json
|
|
||||||
|
|
||||||
|
|
||||||
A lot of people wonder why aider doesn't have LLMs use tools or function calls to
|
A lot of people wonder why aider doesn't have LLMs use tools or function calls to
|
||||||
specify code edits.
|
specify code edits.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue