mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-01 02:05:00 +00:00
format output as yaml
This commit is contained in:
parent
3162d42262
commit
a7b08c7354
1 changed files with 59 additions and 35 deletions
|
@ -34,6 +34,9 @@ EXERCISES_DIR_DEFAULT = "exercism-python"
|
||||||
app = typer.Typer(add_completion=False, pretty_exceptions_enable=False)
|
app = typer.Typer(add_completion=False, pretty_exceptions_enable=False)
|
||||||
|
|
||||||
|
|
||||||
|
NUM_TESTS = (89, 133)
|
||||||
|
|
||||||
|
|
||||||
def show_stats(dirnames, graphs):
|
def show_stats(dirnames, graphs):
|
||||||
raw_rows = []
|
raw_rows = []
|
||||||
for dirname in dirnames:
|
for dirname in dirnames:
|
||||||
|
@ -48,7 +51,7 @@ def show_stats(dirnames, graphs):
|
||||||
if not row:
|
if not row:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if row.completed_tests not in (89, 133):
|
if row.completed_tests not in NUM_TESTS:
|
||||||
print(f"Warning: {row.dir_name} is incomplete: {row.completed_tests}")
|
print(f"Warning: {row.dir_name} is incomplete: {row.completed_tests}")
|
||||||
|
|
||||||
kind = (row.model, row.edit_format)
|
kind = (row.model, row.edit_format)
|
||||||
|
@ -356,7 +359,25 @@ def summarize_results(dirname):
|
||||||
console = Console(highlight=False)
|
console = Console(highlight=False)
|
||||||
console.rule(title=str(dirname))
|
console.rule(title=str(dirname))
|
||||||
|
|
||||||
console.print(f"test-cases: {res.completed_tests}")
|
commit_hashes = variants["commit_hash"]
|
||||||
|
versions = get_versions(commit_hashes)
|
||||||
|
date = dirname.name[:10]
|
||||||
|
|
||||||
|
def show(stat, red="red"):
|
||||||
|
val = getattr(res, stat)
|
||||||
|
style = red if val else None
|
||||||
|
console.print(f" {stat}: {val}", style=style)
|
||||||
|
|
||||||
|
percents = dict()
|
||||||
|
for i in range(tries):
|
||||||
|
pass_rate = 100 * passed_tests[i] / res.completed_tests
|
||||||
|
percents[i] = pass_rate
|
||||||
|
# console.print(f"{pass_rate:.1f}% correct after try {i+1}")
|
||||||
|
setattr(res, f"pass_rate_{i+1}", f"{pass_rate:.1f}")
|
||||||
|
|
||||||
|
print(f"- dirname: {dirname.name}")
|
||||||
|
style = None if res.completed_tests in NUM_TESTS else "red"
|
||||||
|
console.print(f" test_cases: {res.completed_tests}", style=style)
|
||||||
for key, val in variants.items():
|
for key, val in variants.items():
|
||||||
if len(val) > 1:
|
if len(val) > 1:
|
||||||
style = "red"
|
style = "red"
|
||||||
|
@ -364,42 +385,41 @@ def summarize_results(dirname):
|
||||||
style = None
|
style = None
|
||||||
val = ", ".join(map(str, val))
|
val = ", ".join(map(str, val))
|
||||||
setattr(res, key, val)
|
setattr(res, key, val)
|
||||||
console.print(f"{key}: {val}", style=style)
|
console.print(f" {key}: {val}", style=style)
|
||||||
|
|
||||||
def show(stat):
|
for i in range(tries):
|
||||||
val = getattr(res, stat)
|
print(f" pass_rate_{i+1}: {percents[i]:.1f}")
|
||||||
style = "red" if val else None
|
|
||||||
console.print(f"{stat}: {val}", style=style)
|
pct_well_formed = 1.0 - res.num_malformed_responses / res.completed_tests
|
||||||
|
print(f" percent_cases_well_formed: {pct_well_formed*100:.1f}")
|
||||||
|
|
||||||
console.print()
|
|
||||||
show("error_outputs")
|
show("error_outputs")
|
||||||
|
show("num_malformed_responses")
|
||||||
show("user_asks")
|
show("user_asks")
|
||||||
show("lazy_comments")
|
show("lazy_comments")
|
||||||
show("num_malformed_responses")
|
|
||||||
show("syntax_errors")
|
show("syntax_errors")
|
||||||
show("indentation_errors")
|
show("indentation_errors")
|
||||||
console.print()
|
|
||||||
show("exhausted_context_windows")
|
show("exhausted_context_windows")
|
||||||
show("test_timeouts")
|
show("test_timeouts")
|
||||||
|
|
||||||
console.print()
|
a_model = set(variants["model"]).pop()
|
||||||
percents = dict()
|
command = f"aider --model {a_model}"
|
||||||
for i in range(tries):
|
print(f" command: {command}")
|
||||||
pass_rate = 100 * passed_tests[i] / res.completed_tests
|
|
||||||
percents[i] = pass_rate
|
print(f" date: {date}")
|
||||||
console.print(f"{pass_rate:.1f}% correct after try {i}")
|
print(" versions:", ",".join(versions))
|
||||||
setattr(res, f"pass_rate_{i+1}", pass_rate)
|
|
||||||
|
|
||||||
console.print()
|
|
||||||
res.avg_duration = res.duration / res.completed_tests
|
res.avg_duration = res.duration / res.completed_tests
|
||||||
|
print(f" seconds_per_case: {res.avg_duration:.1f}")
|
||||||
|
|
||||||
console.print(f"duration: {res.avg_duration:.1f} sec/test-case")
|
print(f" total_cost: {res.cost:.4f}")
|
||||||
|
|
||||||
res.avg_cost = res.cost / res.completed_tests
|
res.avg_cost = res.cost / res.completed_tests
|
||||||
|
|
||||||
projected_cost = res.avg_cost * res.total_tests
|
projected_cost = res.avg_cost * res.total_tests
|
||||||
|
|
||||||
console.print(
|
print()
|
||||||
|
print(
|
||||||
f"costs: ${res.avg_cost:.4f}/test-case, ${res.cost:.2f} total,"
|
f"costs: ${res.avg_cost:.4f}/test-case, ${res.cost:.2f} total,"
|
||||||
f" ${projected_cost:.2f} projected"
|
f" ${projected_cost:.2f} projected"
|
||||||
)
|
)
|
||||||
|
@ -413,21 +433,8 @@ def summarize_results(dirname):
|
||||||
csv.append(f"{first:.1f}")
|
csv.append(f"{first:.1f}")
|
||||||
|
|
||||||
csv.append(" ".join(variants["edit_format"]))
|
csv.append(" ".join(variants["edit_format"]))
|
||||||
model = variants["model"].pop()
|
csv.append(command)
|
||||||
csv.append(f"aider --model {model}")
|
|
||||||
versions = set()
|
|
||||||
for hsh in variants["commit_hash"]:
|
|
||||||
if not hsh:
|
|
||||||
continue
|
|
||||||
hsh = hsh.split("-")[0]
|
|
||||||
try:
|
|
||||||
version = subprocess.check_output(
|
|
||||||
["git", "show", f"{hsh}:aider/__init__.py"], universal_newlines=True
|
|
||||||
)
|
|
||||||
version = re.search(r'__version__ = "(.*)"', version).group(1)
|
|
||||||
versions.add(version)
|
|
||||||
except subprocess.CalledProcessError:
|
|
||||||
pass
|
|
||||||
csv.append(" ".join(sorted(versions)))
|
csv.append(" ".join(sorted(versions)))
|
||||||
commit_hashes = variants.get("commit_hash", [])
|
commit_hashes = variants.get("commit_hash", [])
|
||||||
if all(commit_hashes):
|
if all(commit_hashes):
|
||||||
|
@ -445,6 +452,23 @@ def summarize_results(dirname):
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def get_versions(commit_hashes):
|
||||||
|
versions = set()
|
||||||
|
for hsh in commit_hashes:
|
||||||
|
if not hsh:
|
||||||
|
continue
|
||||||
|
hsh = hsh.split("-")[0]
|
||||||
|
try:
|
||||||
|
version = subprocess.check_output(
|
||||||
|
["git", "show", f"{hsh}:aider/__init__.py"], universal_newlines=True
|
||||||
|
)
|
||||||
|
version = re.search(r'__version__ = "(.*)"', version).group(1)
|
||||||
|
versions.add(version)
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
pass
|
||||||
|
return versions
|
||||||
|
|
||||||
|
|
||||||
def get_replayed_content(replay_dname, test_dname):
|
def get_replayed_content(replay_dname, test_dname):
|
||||||
replay_dname = Path(replay_dname)
|
replay_dname = Path(replay_dname)
|
||||||
test_dname = Path(test_dname)
|
test_dname = Path(test_dname)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue