diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 28910f5c9..020026f17 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -121,8 +121,9 @@ def show_stats(dirnames): df.sort_values(by=["model", "edit_format"], inplace=True) # dump(df) - plot_timing(df) - plot_outcomes(df, repeats, repeat_hi, repeat_lo, repeat_avg) + if graphs: + plot_timing(df) + plot_outcomes(df, repeats, repeat_hi, repeat_lo, repeat_avg) def plot_timing(df): @@ -309,6 +310,7 @@ def resolve_dirname(dirname, use_single_prior, make_new): @app.command() def main( dirnames: List[str] = typer.Argument(..., help="Directory names"), + graphs: bool = typer.Option(False, "--graphs", help="Generate graphs"), model: str = typer.Option("gpt-3.5-turbo", "--model", "-m", help="Model name"), edit_format: str = typer.Option(None, "--edit-format", "-e", help="Edit format"), keywords: str = typer.Option(