diff --git a/_data/leaderboard.csv b/_data/leaderboard.csv index 169d843f7..e6fd8bdac 100644 --- a/_data/leaderboard.csv +++ b/_data/leaderboard.csv @@ -1,11 +1,16 @@ -model,second,first,format,command,version,commit,date +model,second,first,format,command,version,commits,date claude-3-opus-20240229,68.4,53.4,diff,aider --opus,0.30.1,f4b1797,5/2/24 -gpt-4-0613,67.7,46.6,diff,aider -4,0.18.1,3aa17c4,12/16/23 -gpt-4-turbo-2024-04-09,64.4,49.2,udiff,aider --gpt-4-turbo,0.30.1,e610e5b,5/1/24 -gemini-1.5-pro-latest,57.1,45.9,diff-fenced,aider --model gemini/gemini-1.5-pro-latest,0.32.0,5d32dd7,5/3/24 -gpt-3.5-turbo-1106,56.1,45.5,whole,aider --model gpt-3.5-turbo-1106,0.30.1,7b14d77,4/30/24 claude-3-sonnet-20240229,54.9,43.6,whole,aider --sonnet,0.25.0,a5f8076,3/6/24 +Command-R+,29.3,22.6,whole,aider --model command-r-plus,0.28.0,a06c927,4/20/24 Deepseek Coder,54.5,47,whole,aider --model openai/deepseek-coder,0.30.1,c07f793,4/29/24 +gemini-1.5-pro-latest,57.1,45.9,diff-fenced,aider --model gemini/gemini-1.5-pro-latest,0.32.0,5d32dd7,5/3/24 gpt-3.5-turbo-0125,49.6,39.8,whole,aider --35turbo,0.22.0,da14474,2/2/24 -Llama3 70B,49.2,38.6,diff,aider --model groq/llama3-70b-8192,0.32.0,b5bb453,5/3/24 -Command-R+,29.3,22.6,whole,aider --model command-r-plus,0.28.0,a06c927,4/20/24 \ No newline at end of file +gpt-3.5-turbo-0301,57.9,50.4,whole,aider --model gpt-3.5-turbo-0301,0.16.4-dev,44388db-dirty,11/6/23 +gpt-3.5-turbo-0613,50.4,38.3,whole,aider --model gpt-3.5-turbo-0613,0.16.4-dev,93aa497-dirty,11/7/23 +gpt-3.5-turbo-1106,56.1,45.5,whole,aider --model gpt-3.5-turbo-1106,0.30.1,7b14d77,4/30/24 +gpt-4-0125-preview,66.2,55.6,udiff,aider,0.22.1-dev,edcf9b1,1/25/24 +gpt-4-0314,68.4,51.1,diff,aider --model gpt-4-0314,0.7.2,506bfe2 c7c548d,7/1/23 +gpt-4-0613,67.7,46.6,diff,aider -4,0.18.1,3aa17c4,12/16/23 +gpt-4-1106-preview,63.2,57.1,udiff,aider,0.31.2-dev,1981105-dirty,5/4/24 +gpt-4-turbo-2024-04-09,64.4,49.2,diff,aider --gpt-4-turbo,0.30.1,e610e5b,5/1/24 +Llama3 70B,49.2,38.6,diff,aider --model groq/llama3-70b-8192,0.32.0,b5bb453,5/3/24 \ No newline at end of file diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index d1f5e82a5..2b6f453d2 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -870,26 +870,38 @@ def summarize_results(dirname): ) csv = [] - csv.append(' '.join(variants['model'])) - csv.append(f"{percents[1]:.1f}") - csv.append(f"{percents[0]:.1f}") - csv.append(' '.join(variants['edit_format'])) - csv.append('aider') - csv.append('version') - for hsh in variants['commit_hash']: + csv.append(" ".join(variants["model"])) + + second = percents.get(1, 0) + first = percents.get(0, 0) + csv.append(f"{second:.1f}") + csv.append(f"{first:.1f}") + + csv.append(" ".join(variants["edit_format"])) + csv.append("aider") + versions = set() + for hsh in variants["commit_hash"]: + if not hsh: + continue + hsh = hsh.split("-")[0] try: version = subprocess.check_output( - ['git', 'show', f'{hsh}:aider/__init__.py'], - universal_newlines=True + ["git", "show", f"{hsh}:aider/__init__.py"], universal_newlines=True ) version = re.search(r'__version__ = "(.*)"', version).group(1) - csv.append(version) + versions.add(version) except subprocess.CalledProcessError: - csv.append('unknown') - csv.append(' '.join(variants['commit_hash'])) + pass + csv.append(" ".join(sorted(versions))) + commit_hashes = variants.get("commit_hash", []) + if all(commit_hashes): + csv.append(" ".join(commit_hashes)) + else: + csv.append("") csv.append(dirname.name[:10]) - csv = ','.join(csv) + csv = ",".join(csv) print() + print("Add this to _data/leaderboard.csv:") print(csv) console.rule() diff --git a/docs/leaderboard.md b/docs/leaderboard.md index dc5f0e81b..3d63f9876 100644 --- a/docs/leaderboard.md +++ b/docs/leaderboard.md @@ -26,7 +26,8 @@ it will work best with models that score well on the code editing benchmark.
- {% for row in site.data.leaderboard %} + {% assign sorted = site.data.leaderboard | sort: 'second' | reverse %} + {% for row in sorted %}