mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-28 16:25:00 +00:00
style: Run linter on benchmark script
This commit is contained in:
parent
9cc674c283
commit
7bfc2e0e74
1 changed files with 21 additions and 21 deletions
|
@ -1,9 +1,11 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
def load_results(dirname):
|
||||
"""Load all result files from a benchmark directory"""
|
||||
|
@ -11,7 +13,7 @@ def load_results(dirname):
|
|||
benchmark_dir = Path("tmp.benchmarks") / dirname
|
||||
if not benchmark_dir.exists():
|
||||
return None
|
||||
|
||||
|
||||
all_results = []
|
||||
for fname in benchmark_dir.glob("*/.aider.results.json"):
|
||||
try:
|
||||
|
@ -22,61 +24,59 @@ def load_results(dirname):
|
|||
continue
|
||||
return all_results
|
||||
|
||||
|
||||
def analyze_exercise_solutions():
|
||||
# Load the leaderboard data
|
||||
with open("aider/website/_data/edit_leaderboard.yml") as f:
|
||||
leaderboard = yaml.safe_load(f)
|
||||
|
||||
|
||||
# Track which models solved each exercise
|
||||
exercise_solutions = defaultdict(list)
|
||||
|
||||
|
||||
for entry in leaderboard:
|
||||
dirname = entry["dirname"]
|
||||
model = entry["model"]
|
||||
|
||||
|
||||
results = load_results(dirname)
|
||||
if not results:
|
||||
print(f"Could not load results for {dirname}")
|
||||
continue
|
||||
|
||||
|
||||
for result in results:
|
||||
testcase = result.get("testcase")
|
||||
if not testcase:
|
||||
continue
|
||||
|
||||
|
||||
# Consider it solved if the last test attempt passed
|
||||
tests_outcomes = result.get("tests_outcomes", [])
|
||||
if tests_outcomes and tests_outcomes[-1]:
|
||||
exercise_solutions[testcase].append(model)
|
||||
|
||||
|
||||
# Print statistics
|
||||
print("\nExercise Solution Statistics:")
|
||||
print("-" * 40)
|
||||
|
||||
|
||||
# Sort by number of models that solved each exercise
|
||||
sorted_exercises = sorted(
|
||||
exercise_solutions.items(),
|
||||
key=lambda x: len(x[1]),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
sorted_exercises = sorted(exercise_solutions.items(), key=lambda x: len(x[1]), reverse=True)
|
||||
|
||||
for testcase, models in sorted_exercises:
|
||||
print(f"{testcase}: solved by {len(models)} models")
|
||||
#print(f" Models: {', '.join(models)}")
|
||||
|
||||
# print(f" Models: {', '.join(models)}")
|
||||
|
||||
print("\nSummary:")
|
||||
print(f"Total exercises solved at least once: {len(exercise_solutions)}")
|
||||
never_solved = 133 - len(exercise_solutions)
|
||||
print(f"Never solved by any model: {never_solved}")
|
||||
|
||||
|
||||
# Distribution of solutions
|
||||
solved_by_counts = defaultdict(int)
|
||||
for models in exercise_solutions.values():
|
||||
solved_by_counts[len(models)] += 1
|
||||
|
||||
|
||||
print("\nDistribution of solutions:")
|
||||
for count in sorted(solved_by_counts.keys()):
|
||||
print(f"Solved by {count} models: {solved_by_counts[count]} exercises")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
analyze_exercise_solutions()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue