move refac data to yml

This commit is contained in:
Paul Gauthier 2024-05-06 11:21:38 -07:00
parent a7b08c7354
commit e58ce69154
3 changed files with 45 additions and 10 deletions

View file

@ -1,6 +0,0 @@
model,second,first,format,command,version,commits,date
gpt-4-turbo-2024-04-09,0,34.1,udiff,aider --gpt-4-turbo,0.27.1-dev,b75fdb9,4/9/24
gpt-4-0125-preview,0,43.8,udiff,aider --model gpt-4-0125-preview,0.22.1-dev,0fbd702,1/25/24
gpt-4-1106-preview,0,57.3,udiff,aider --model gpt-4-1106-preview,0.22.1-dev,a75e7c8,1/25/24
claude-3-opus-20240229,0,67.4,diff,aider --opus,0.31.2-dev,b02320b-dirty,5/4/24
gemini/gemini-1.5-pro-latest,0.0,49.4,diff-fenced,aider --model gemini/gemini-1.5-pro-latest,0.31.2-dev,425cb29 1b35ca2-dirty a0649ba-dirty 3e4fca2-dirty,2024-05-04
1 model second first format command version commits date
2 gpt-4-turbo-2024-04-09 0 34.1 udiff aider --gpt-4-turbo 0.27.1-dev b75fdb9 4/9/24
3 gpt-4-0125-preview 0 43.8 udiff aider --model gpt-4-0125-preview 0.22.1-dev 0fbd702 1/25/24
4 gpt-4-1106-preview 0 57.3 udiff aider --model gpt-4-1106-preview 0.22.1-dev a75e7c8 1/25/24
5 claude-3-opus-20240229 0 67.4 diff aider --opus 0.31.2-dev b02320b-dirty 5/4/24
6 gemini/gemini-1.5-pro-latest 0.0 49.4 diff-fenced aider --model gemini/gemini-1.5-pro-latest 0.31.2-dev 425cb29 1b35ca2-dirty a0649ba-dirty 3e4fca2-dirty 2024-05-04

View file

@ -0,0 +1,41 @@
- dirname: 2024-05-04-23-27-02--refac-gemini
test_cases: 89
model: gemini/gemini-1.5-pro-latest
edit_format: diff-fenced
commit_hash: a0649ba-dirty, 425cb29, 1b35ca2-dirty, 3e4fca2-dirty
pass_rate_1: 49.4
percent_cases_well_formed: 7.9
error_outputs: 247
num_malformed_responses: 82
user_asks: 0
lazy_comments: 4
syntax_errors: 0
indentation_errors: 8
exhausted_context_windows: 0
test_timeouts: 0
command: aider --model gemini/gemini-1.5-pro-latest
date: 2024-05-04
versions: 0.31.2-dev
seconds_per_case: 55.7
total_cost: 0.0000
- dirname: 2024-05-04-17-45-53--refac-opus
test_cases: 83
model: openrouter/anthropic/claude-3-opus
edit_format: diff
commit_hash: b02320b-dirty
pass_rate_1: 72.3
percent_cases_well_formed: 79.5
error_outputs: 51
num_malformed_responses: 17
user_asks: 0
lazy_comments: 2
syntax_errors: 1
indentation_errors: 3
exhausted_context_windows: 0
test_timeouts: 0
command: aider --model openrouter/anthropic/claude-3-opus
date: 2024-05-04
versions: 0.31.2-dev
seconds_per_case: 67.8
total_cost: 27.9176

View file

@ -99,13 +99,13 @@ Therefore, results are available for fewer models.
</tr>
</thead>
<tbody>
{% assign refac_sorted = site.data.refactor_leaderboard | sort: 'first' | reverse %}
{% assign refac_sorted = site.data.refactor_leaderboard | sort: 'pass_rate_1' | reverse %}
{% for row in refac_sorted %}
<tr style="border-bottom: 1px solid #ddd;">
<td style="padding: 8px;">{{ row.model }}</td>
<td style="padding: 8px; text-align: center;">{{ row.first }}%</td>
<td style="padding: 8px; text-align: center;">{{ row.pass_rate_1 }}%</td>
<td style="padding: 8px;"><code>{{ row.command }}</code></td>
<td style="padding: 8px; text-align: center;">{{ row.format }}</td>
<td style="padding: 8px; text-align: center;">{{ row.edit_format }}</td>
</tr>
{% endfor %}
</tbody>
@ -129,7 +129,7 @@ Therefore, results are available for fewer models.
{% for row in refac_sorted %}
leaderboardData.labels.push('{{ row.model }}');
leaderboardData.datasets[0].data.push({{ row.first }});
leaderboardData.datasets[0].data.push({{ row.pass_rate_1 }});
{% endfor %}
var leaderboardChart = new Chart(ctx, {