diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
index 6666c0a4b..d541fb991 100755
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -42,50 +42,15 @@ def show_stats(dirnames, graphs):
 
     # return
 
-    repeats = []
     seen = dict()
     rows = []
     for row in raw_rows:
         if not row:
             continue
 
-        if row.model == "gpt-3.5-turbo":
-            row.model = "gpt-3.5-turbo-0613"
-
-        if row.model == "gpt-4":
-            row.model = "gpt-4-0613"
-
-        if row.edit_format == "diff-func-string":
-            row.edit_format = "diff-func"
-
-        if (
-            row.model == "gpt-3.5-turbo-0613"
-            and row.edit_format == "whole"
-            and "repeat" not in row.dir_name
-        ):
-            # remember this row, so we can update it with the repeat_avg
-            repeat_row = len(rows)
-
-        # gpt35 = "gpt-3.5-turbo"
-        # gpt4 = "gpt-4"
-        # if row.model.startswith(gpt35):
-        #    row.model = gpt35 + "\n" + row.model[len(gpt35) :]
-        # elif row.model.startswith(gpt4):
-        #    row.model = gpt4 + "\n" + row.model[len(gpt4) :]
-
-        if "folk" in row.dir_name:
-            row.edit_format += "folk"
-
-        # if row.model == "gpt-4-0613":
-        #    row.model += "\n(8k context window is\ntoo small for benchmark)"
-
-        if row.completed_tests < 89:
+        if row.completed_tests not in (89, 133):
             print(f"Warning: {row.dir_name} is incomplete: {row.completed_tests}")
 
-        # if "repeat" in row.dir_name:
-        #    repeats.append(vars(row))
-        #    continue
-
         kind = (row.model, row.edit_format)
         if kind in seen:
             dump(row.dir_name)
@@ -95,27 +60,7 @@ def show_stats(dirnames, graphs):
         seen[kind] = row.dir_name
         rows.append(vars(row))
 
-    if repeats:
-        dump(repeats)
-        extra = rows[repeat_row]
-        dump(extra)
-        repeats.append(extra)
-        repeats = pd.DataFrame.from_records(repeats)
-        repeat_max = repeats["pass_rate_2"].max()
-        repeat_min = repeats["pass_rate_2"].min()
-        repeat_avg = repeats["pass_rate_2"].mean()
-
-        repeat_lo = repeat_avg - repeat_min
-        repeat_hi = repeat_max - repeat_avg
-
-        dump(repeat_max)
-        dump(repeat_min)
-        dump(repeat_avg)
-
-        # use the average in the main bar
-        rows[repeat_row]["pass_rate_2"] = repeat_avg
-    else:
-        repeat_hi = repeat_lo = repeat_avg = None  # noqa: F841
+    repeat_hi = repeat_lo = repeat_avg = None  # noqa: F841
 
     df = pd.DataFrame.from_records(rows)
     # df.sort_values(by=["model", "edit_format"], inplace=True)