add deepseek v3

This commit is contained in:
Paul Gauthier 2024-12-25 09:01:43 -05:00
parent dd9b2a872c
commit ec2da0a399
3 changed files with 32 additions and 3 deletions

View file

@ -104,7 +104,7 @@
- dirname: 2024-12-21-20-56-21--polyglot-deepseek-diff
test_cases: 225
model: deepseek-chat
model: DeepSeek Chat V2.5
edit_format: diff
commit_hash: a755079-dirty
pass_rate_1: 5.3

View file

@ -104,7 +104,7 @@
- dirname: 2024-12-21-20-56-21--polyglot-deepseek-diff
test_cases: 225
model: deepseek-chat
model: DeepSeek Chat V2.5
edit_format: diff
commit_hash: a755079-dirty
pass_rate_1: 5.3
@ -256,4 +256,30 @@
date: 2024-12-22
versions: 0.69.2.dev
seconds_per_case: 12.2
total_cost: 0.0000
total_cost: 0.0000
- dirname: 2024-12-25-13-31-51--deepseekv3preview-diff2
test_cases: 225
model: DeepSeek Chat V3 Preview
edit_format: diff
commit_hash: 0a23c4a-dirty
pass_rate_1: 22.7
pass_rate_2: 48.4
pass_num_1: 51
pass_num_2: 109
percent_cases_well_formed: 98.7
error_outputs: 7
num_malformed_responses: 7
num_with_malformed_responses: 3
user_asks: 19
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
exhausted_context_windows: 0
test_timeouts: 8
total_tests: 225
command: aider --model deepseek/deepseek-chat
date: 2024-12-25
versions: 0.69.2.dev
seconds_per_case: 34.8
total_cost: 0.3369

View file

@ -68,12 +68,15 @@ The model also has to successfully apply all its changes to the source file with
</tbody>
</table>
### Aider polyglot benchmark results
<canvas id="editChart" width="800" height="450" style="margin-top: 20px"></canvas>
<script src="https://unpkg.com/patternomaly/dist/patternomaly.js"></script>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script>
{% assign data_source = edit_sorted %}
{% assign pass_rate_field = "pass_rate_2" %}
{% assign highlight_model = "xxxxxxxxxxx" %}
{% include leaderboard.js %}
</script>
<style>