mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-31 09:44:59 +00:00
feat: Handle new exercise dir layout and add language info
This commit is contained in:
parent
0a3e0665ab
commit
6d74a564e6
1 changed files with 27 additions and 8 deletions
|
@ -23,9 +23,13 @@ def load_results(dirname):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
all_results = []
|
all_results = []
|
||||||
for fname in benchmark_dir.glob("*/.aider.results.json"):
|
# Look in language subdirectories under exercises/practice
|
||||||
|
for fname in benchmark_dir.glob("*/exercises/practice/*/.aider.results.json"):
|
||||||
try:
|
try:
|
||||||
results = json.loads(fname.read_text())
|
results = json.loads(fname.read_text())
|
||||||
|
# Add language info to results
|
||||||
|
lang = fname.parts[-4] # Get language from path
|
||||||
|
results['language'] = lang
|
||||||
all_results.append(results)
|
all_results.append(results)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
print(f"Failed to parse {fname}")
|
print(f"Failed to parse {fname}")
|
||||||
|
@ -107,17 +111,32 @@ def analyze_exercise_solutions(dirs=None, topn=None):
|
||||||
if exercise not in exercise_solutions:
|
if exercise not in exercise_solutions:
|
||||||
exercise_solutions[exercise] = []
|
exercise_solutions[exercise] = []
|
||||||
|
|
||||||
# Sort by number of models that solved each exercise
|
# Group exercises by language
|
||||||
sorted_exercises = sorted(exercise_solutions.items(), key=lambda x: len(x[1]), reverse=True)
|
by_language = defaultdict(list)
|
||||||
|
for testcase in all_exercises:
|
||||||
|
# Find language for this testcase from results
|
||||||
|
lang = next((r['language'] for r in next(iter(valid_entries))[1] if r['testcase'] == testcase), 'unknown')
|
||||||
|
by_language[lang].append(testcase)
|
||||||
|
|
||||||
# Calculate max length for alignment
|
# Sort languages
|
||||||
|
sorted_languages = sorted(by_language.keys())
|
||||||
|
|
||||||
|
# Calculate max lengths for alignment
|
||||||
max_name_len = max(len(testcase) for testcase in all_exercises)
|
max_name_len = max(len(testcase) for testcase in all_exercises)
|
||||||
|
max_lang_len = max(len(lang) for lang in sorted_languages)
|
||||||
total_models = len(valid_entries)
|
total_models = len(valid_entries)
|
||||||
|
|
||||||
for i, (testcase, models) in enumerate(sorted_exercises, 1):
|
# Print exercises grouped by language
|
||||||
num_solved = len(models)
|
for lang in sorted_languages:
|
||||||
percent = (num_solved / total_models) * 100
|
print(f"\n{lang.upper()}:")
|
||||||
print(f"{i:>3}. {testcase:<{max_name_len}} : {num_solved:>3} solved ({percent:>5.1f}%)")
|
lang_exercises = [(ex, exercise_solutions[ex]) for ex in by_language[lang]]
|
||||||
|
# Sort by number of models that solved each exercise
|
||||||
|
lang_exercises.sort(key=lambda x: len(x[1]), reverse=True)
|
||||||
|
|
||||||
|
for i, (testcase, models) in enumerate(lang_exercises, 1):
|
||||||
|
num_solved = len(models)
|
||||||
|
percent = (num_solved / total_models) * 100
|
||||||
|
print(f"{i:>3}. {testcase:<{max_name_len}} : {num_solved:>3} solved ({percent:>5.1f}%)")
|
||||||
|
|
||||||
print("\nSummary:")
|
print("\nSummary:")
|
||||||
solved_at_least_once = len([ex for ex, models in exercise_solutions.items() if models])
|
solved_at_least_once = len([ex for ex, models in exercise_solutions.items() if models])
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue