From 0c3d4198f5c4f15ef09b6b2d4f09bdeabd53e9d8 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 25 Jun 2023 13:18:43 -0700 Subject: [PATCH] Show variants --- scripts/benchmark.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/scripts/benchmark.py b/scripts/benchmark.py index b3ce8f4bb..ae81140ee 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -6,6 +6,7 @@ import random import shutil import subprocess import time +from collections import defaultdict from json.decoder import JSONDecodeError from pathlib import Path @@ -152,6 +153,8 @@ def summarize_results(all_results, total_tests=None): duration = 0 total_cost = 0 + variants = defaultdict(set) + for results in all_results: if not results: continue @@ -165,13 +168,23 @@ def summarize_results(all_results, total_tests=None): total_cost += results["cost"] duration += results["duration"] + for key in "model edit_format".split(): + if key in results: + variants[key].add(results[key]) + console.rule() console.print(f"{completed_tests} test-cases") + for key, val in variants.items(): + val = ", ".join(val) + console.print(f"{key}: {val}") + + console.print() for i in range(retries): pass_rate = 100 * passed_tests[i] / completed_tests console.print(f"{pass_rate:.1f}% correct after try {i}") + console.print() avg_duration = duration / completed_tests console.print(f"{avg_duration:.1f} sec/test-case")