mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-01 10:14:59 +00:00
handle tasks with exceptions in the stats output
This commit is contained in:
parent
a0649ba5fa
commit
3bb237bdc1
3 changed files with 18 additions and 11 deletions
|
@ -3,4 +3,4 @@ gpt-4-turbo-2024-04-09,0,34.1,udiff,aider --gpt-4-turbo,0.27.1-dev,b75fdb9,4/9/2
|
|||
gpt-4-0125-preview,0,43.8,udiff,aider --model gpt-4-0125-preview,0.22.1-dev,0fbd702,1/25/24
|
||||
gpt-4-1106-preview,0,57.3,udiff,aider --model gpt-4-1106-preview,0.22.1-dev,a75e7c8,1/25/24
|
||||
claude-3-opus-20240229,0,67.4,diff,aider --opus,0.31.2-dev,b02320b-dirty,5/4/24
|
||||
gemini/gemini-1.5-pro-latest,0.0,50.6,diff-fenced,aider --model gemini/gemini-1.5-pro-latest,0.31.2-dev,3e4fca2-dirty 1b35ca2-dirty 425cb29,5/4/24
|
||||
gemini/gemini-1.5-pro-latest,0.0,49.4,diff-fenced,aider --model gemini/gemini-1.5-pro-latest,0.31.2-dev,425cb29 1b35ca2-dirty a0649ba-dirty 3e4fca2-dirty,2024-05-04
|
||||
|
|
|
|
@ -759,12 +759,13 @@ def load_results(dirname):
|
|||
|
||||
def summarize_results(dirname):
|
||||
all_results = load_results(dirname)
|
||||
dump(len(all_results))
|
||||
|
||||
res = SimpleNamespace()
|
||||
res.total_tests = len(list(Path(dirname).glob("*")))
|
||||
|
||||
try:
|
||||
tries = max(len(results["tests_outcomes"]) for results in all_results if results)
|
||||
tries = max(len(results.get("tests_outcomes", [])) for results in all_results if results)
|
||||
except ValueError:
|
||||
tries = 0
|
||||
|
||||
|
@ -791,13 +792,14 @@ def summarize_results(dirname):
|
|||
continue
|
||||
|
||||
res.completed_tests += 1
|
||||
passed = results["tests_outcomes"][-1]
|
||||
tests_outcomes = results.get("tests_outcomes", [])
|
||||
passed = tests_outcomes and tests_outcomes[-1]
|
||||
if passed:
|
||||
for i in range(len(results["tests_outcomes"]) - 1, tries):
|
||||
for i in range(len(tests_outcomes) - 1, tries):
|
||||
passed_tests[i] += 1
|
||||
|
||||
res.cost += results["cost"]
|
||||
res.duration += results["duration"]
|
||||
res.cost += results.get("cost", 0)
|
||||
res.duration += results.get("duration", 0)
|
||||
res.test_timeouts += results.get("test_timeouts", 0)
|
||||
|
||||
res.error_outputs += results.get("num_error_outputs", 0)
|
||||
|
@ -811,6 +813,7 @@ def summarize_results(dirname):
|
|||
|
||||
for key in "model edit_format commit_hash".split():
|
||||
val = results.get(key)
|
||||
if val:
|
||||
variants[key].add(val)
|
||||
|
||||
if not res.completed_tests:
|
||||
|
@ -903,7 +906,7 @@ def summarize_results(dirname):
|
|||
csv.append(dirname.name[:10])
|
||||
csv = ",".join(csv)
|
||||
print()
|
||||
print("Add this to _data/leaderboard.csv:")
|
||||
print("Add this to the files in _data:")
|
||||
print(csv)
|
||||
console.rule()
|
||||
|
||||
|
@ -928,15 +931,19 @@ def get_replayed_content(replay_dname, test_dname):
|
|||
return "".join(res)
|
||||
|
||||
|
||||
def run_test(*args, **kwargs):
|
||||
def run_test(original_dname, testdir, *args, **kwargs):
|
||||
try:
|
||||
return run_test_real(*args, **kwargs)
|
||||
return run_test_real(original_dname, testdir, *args, **kwargs)
|
||||
except Exception as err:
|
||||
print("=" * 40)
|
||||
print("Test failed")
|
||||
print(err)
|
||||
traceback.print_exc()
|
||||
|
||||
testdir = Path(testdir)
|
||||
results_fname = testdir / ".aider.results.json"
|
||||
results_fname.write_text(json.dumps(dict(exception=str(err))))
|
||||
|
||||
|
||||
def run_test_real(
|
||||
original_dname,
|
||||
|
|
|
@ -175,4 +175,4 @@ See the
|
|||
[benchmark README](https://github.com/paul-gauthier/aider/blob/main/benchmark/README.md)
|
||||
for information on running aider's code editing benchmark.
|
||||
Submit results by opening a PR with edits to the
|
||||
[benchmark results CSV data files](https://github.com/paul-gauthier/aider/blob/main/_data/).
|
||||
[benchmark results data files](https://github.com/paul-gauthier/aider/blob/main/_data/).
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue