fix: Handle missing testcase in results and bad json

This commit is contained in:
Paul Gauthier 2024-12-19 15:49:12 -08:00 committed by Paul Gauthier (aider)
parent bb711fe255
commit 7eb7533d42

View file

@ -32,19 +32,25 @@ def load_results(dirname):
# Look in language subdirectories under exercises/practice # Look in language subdirectories under exercises/practice
for fname in benchmark_dir.glob("*/exercises/practice/*/.aider.results.json"): for fname in benchmark_dir.glob("*/exercises/practice/*/.aider.results.json"):
error = False
try: try:
results = json.loads(fname.read_text()) results = json.loads(fname.read_text())
# Add language info to results error = 'testcase' not in results
lang = fname.parts[-5] # Get language from path if not error:
results["language"] = lang # Add language info to results
all_results.append(results) lang = fname.parts[-5] # Get language from path
results["language"] = lang
all_results.append(results)
except json.JSONDecodeError: except json.JSONDecodeError:
error = True
if error:
# Track the parse error for this exercise/model combination # Track the parse error for this exercise/model combination
lang = fname.parts[-5] lang = fname.parts[-5]
exercise = f"{fname.parts[-2]}/{lang}" # Use directory name as testcase exercise = f"{fname.parts[-2]}/{lang}" # Use directory name as testcase
parse_errors.append(exercise) parse_errors.append(exercise)
print(f"Failed to parse {fname}") print(f"Bad results file {fname}")
continue continue
return all_results, parse_errors return all_results, parse_errors
@ -105,7 +111,7 @@ def analyze_exercise_solutions(dirs=None, topn=None, copy_hard_set=False):
try: try:
all_exercises.add(result["testcase"] + "/" + result["language"]) all_exercises.add(result["testcase"] + "/" + result["language"])
except KeyError: except KeyError:
print(f"Warning: Missing testcase in {dirname}") print(f"Warning: Missing testcase in {dirname}", json.dumps(result, indent=4))
for (dirname, model), results, _ in valid_entries: for (dirname, model), results, _ in valid_entries:
if not results: if not results:
@ -224,6 +230,9 @@ def analyze_exercise_solutions(dirs=None, topn=None, copy_hard_set=False):
} }
print(f"Total hard set exercises: {len(hard_set)}") print(f"Total hard set exercises: {len(hard_set)}")
dump(disqualified_exercises)
dump(hard_set)
# Count total problems, unsolved problems, and hard set problems by language # Count total problems, unsolved problems, and hard set problems by language
lang_totals = defaultdict(int) lang_totals = defaultdict(int)
lang_unsolved = defaultdict(int) lang_unsolved = defaultdict(int)