mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-29 08:44:59 +00:00
feat: Add pass_num to benchmark results, fix hard set percent
This commit is contained in:
parent
6ddb8a7d88
commit
a915c60999
2 changed files with 10 additions and 4 deletions
|
@ -506,6 +506,7 @@ def summarize_results(dirname):
|
|||
percents[i] = pass_rate
|
||||
# console.print(f"{pass_rate:.1f}% correct after try {i+1}")
|
||||
setattr(res, f"pass_rate_{i + 1}", f"{pass_rate:.1f}")
|
||||
setattr(res, f"pass_num_{i + 1}", passed_tests[i])
|
||||
|
||||
print(f"- dirname: {dirname.name}")
|
||||
style = None if res.completed_tests in NUM_TESTS else "red"
|
||||
|
@ -521,6 +522,8 @@ def summarize_results(dirname):
|
|||
|
||||
for i in range(tries):
|
||||
print(f" pass_rate_{i + 1}: {percents[i]:.1f}")
|
||||
for i in range(tries):
|
||||
print(f" pass_num_{i + 1}: {passed_tests[i]}")
|
||||
|
||||
pct_well_formed = 1.0 - res.num_with_malformed_responses / res.completed_tests
|
||||
print(f" percent_cases_well_formed: {pct_well_formed * 100:.1f}")
|
||||
|
@ -661,6 +664,9 @@ def run_test_real(
|
|||
|
||||
test_files = config.get("files", {}).get("test", [])
|
||||
|
||||
ignore_files = set(["Cargo.toml"])
|
||||
ignore_files.update(test_files)
|
||||
|
||||
# Copy all solution files
|
||||
for file_path in solution_files:
|
||||
src = testdir / Path(file_path)
|
||||
|
@ -746,7 +752,7 @@ def run_test_real(
|
|||
# auto_lint=False, # disabled for code-in-json experiments
|
||||
cache_prompts=True,
|
||||
suggest_shell_commands=False,
|
||||
ignore_mentions=set(test_files),
|
||||
ignore_mentions=ignore_files,
|
||||
)
|
||||
dump(coder.ignore_mentions)
|
||||
|
||||
|
@ -852,7 +858,7 @@ def run_test_real(
|
|||
|
||||
|
||||
def run_unit_tests(original_dname, testdir, history_fname, test_files):
|
||||
timeout = 60
|
||||
timeout = 60 * 3
|
||||
|
||||
# Map of file extensions to test commands
|
||||
TEST_COMMANDS = {
|
||||
|
|
|
@ -201,13 +201,13 @@ def analyze_exercise_solutions(dirs=None, topn=None, copy_hard_set=False):
|
|||
lang_hard_set[lang] += 1
|
||||
|
||||
print("\nUnsolved and hard set problems by language:")
|
||||
print(f"{'Language':<12} {'Unsolved':>8} {'Hard Set':>9} {'Total':>7} {'Percent':>8}")
|
||||
print(f"{'Language':<12} {'Unsolved':>8} {'Hard Set':>9} {'Total':>7} {'%hardUnsolved':>8}")
|
||||
print("-" * 47)
|
||||
for lang in sorted(lang_totals.keys()):
|
||||
count = lang_unsolved[lang]
|
||||
hard = lang_hard_set[lang]
|
||||
total = lang_totals[lang]
|
||||
pct = (count / total) * 100
|
||||
pct = (count / hard) * 100
|
||||
print(f"{lang:<12} {count:>8} {hard:>9} {total:>7} {pct:>7.1f}%")
|
||||
print()
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue