From 207bbbc4df4f2e6f18ddbc31159e0ed63f57ac9c Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Wed, 8 Nov 2023 10:07:29 -0800 Subject: [PATCH] aider: Implemented the `plot_timing` function to create a bar chart showing the average duration of each (model, edit_format) combination, with consistent styling as `plot_outcomes`. # Aider chat conversation: ASSISTANT: To implement the `plot_timing` function and style the graph similarly to `plot_outcomes`, we will follow these steps: 1. Define the `plot_timing` function to create a bar chart that shows the average duration for each combination of model and edit format. 2. Use similar styling options as used in `plot_outcomes` for consistency. Let's start by implementing the `plot_timing` function in the `benchmark/benchmark.py` file. python benchmark/benchmark.py <<<<<<< SEARCH def plot_timing(df): """plot a graph showing the average duration of each (model, edit_format)""" ======= def plot_timing(df): """plot a graph showing the average duration of each (model, edit_format)""" plt.rcParams["hatch.linewidth"] = 0.5 plt.rcParams["hatch.color"] = "#444444" from matplotlib import rc rc("font", **{"family": "sans-serif", "sans-serif": ["Helvetica"], "size": 10}) fig, ax = plt.subplots(figsize=(6, 4)) ax.grid(axis="y", zorder=0, lw=0.2) zorder = 1 grouped = df.groupby(["model", "edit_format"])["avg_duration"].mean().unstack() num_models, num_formats = grouped.shape pos = np.array(range(num_models)) width = 0.8 / num_formats formats = grouped.columns models = grouped.index for i, fmt in enumerate(formats): edge = dict(edgecolor="#ffffff", linewidth=1.5) color = "#b3e6a8" if "diff" in fmt else "#b3d1e6" hatch = "////" if "func" in fmt else "" rects = ax.bar( pos + i * width, grouped[fmt], width * 0.95, color=color, hatch=hatch, zorder=zorder + 1, **edge, ) ax.bar_label(rects, padding=4, labels=[f"{v:.1f}s" for v in grouped[fmt]], size=6) ax.set_xticks([p + 0.5 * width for p in pos]) ax.set_xticklabels(models) ax.set_ylabel("Average duration (seconds)") ax.set_title("GPT Code Editing - Timing") ax.legend( title="Edit Format", loc="upper left", ) ax.set_ylim(top=max(grouped.max()) * 1.1) # Set y-axis limit to 10% more than the max value plt.tight_layout() plt.savefig("tmp_timing.svg") imgcat(fig) >>>>>>> REPLACE This code block adds the implementation of the `plot_timing` function, which creates a bar chart to visualize the average duration of tests for each model and edit format. The styling is kept consistent with the `plot_outcomes` function, using similar colors, hatches, and font settings. The resulting graph is saved as an SVG file and displayed in the terminal using `imgcat`. --- benchmark/benchmark.py | 48 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index ddd3b0977..d23065415 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -127,6 +127,54 @@ def show_stats(dirnames): def plot_timing(df): """plot a graph showing the average duration of each (model, edit_format)""" + plt.rcParams["hatch.linewidth"] = 0.5 + plt.rcParams["hatch.color"] = "#444444" + + from matplotlib import rc + rc("font", **{"family": "sans-serif", "sans-serif": ["Helvetica"], "size": 10}) + + fig, ax = plt.subplots(figsize=(6, 4)) + ax.grid(axis="y", zorder=0, lw=0.2) + + zorder = 1 + grouped = df.groupby(["model", "edit_format"])["avg_duration"].mean().unstack() + num_models, num_formats = grouped.shape + + pos = np.array(range(num_models)) + width = 0.8 / num_formats + + formats = grouped.columns + models = grouped.index + + for i, fmt in enumerate(formats): + edge = dict(edgecolor="#ffffff", linewidth=1.5) + color = "#b3e6a8" if "diff" in fmt else "#b3d1e6" + hatch = "////" if "func" in fmt else "" + rects = ax.bar( + pos + i * width, + grouped[fmt], + width * 0.95, + color=color, + hatch=hatch, + zorder=zorder + 1, + **edge, + ) + ax.bar_label(rects, padding=4, labels=[f"{v:.1f}s" for v in grouped[fmt]], size=6) + + ax.set_xticks([p + 0.5 * width for p in pos]) + ax.set_xticklabels(models) + + ax.set_ylabel("Average duration (seconds)") + ax.set_title("GPT Code Editing - Timing") + ax.legend( + title="Edit Format", + loc="upper left", + ) + ax.set_ylim(top=max(grouped.max()) * 1.1) # Set y-axis limit to 10% more than the max value + + plt.tight_layout() + plt.savefig("tmp_timing.svg") + imgcat(fig) def plot_outcomes(df, repeats, repeat_hi, repeat_lo, repeat_avg):