From 6980cfd1156b8b39274d35b0994539cc59c05c75 Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)"
Date: Tue, 13 Aug 2024 17:31:27 -0700
Subject: [PATCH 01/34] fix: Handle exceptions in the send method of
base_coder.py
---
aider/coders/base_coder.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py
index da386a197..3fdc12260 100755
--- a/aider/coders/base_coder.py
+++ b/aider/coders/base_coder.py
@@ -1244,6 +1244,7 @@ class Coder:
self.io.log_llm_history("TO LLM", format_messages(messages))
+ completion = None
try:
hash_object, completion = send_completion(
model.name,
@@ -1263,6 +1264,8 @@ class Coder:
except KeyboardInterrupt as kbi:
self.keyboard_interrupt()
raise kbi
+ except Exception as e:
+ self.io.tool_error(f"Error during API call: {str(e)}")
finally:
self.io.log_llm_history(
"LLM RESPONSE",
From 139f7992cbe89312a548198524870ed68713ff1c Mon Sep 17 00:00:00 2001
From: Paul Gauthier
Date: Tue, 13 Aug 2024 17:43:41 -0700
Subject: [PATCH 02/34] do not pass pretty to coder
---
benchmark/benchmark.py | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
index 6261e00f9..d3c61dedd 100755
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -378,7 +378,7 @@ def summarize_results(dirname):
pass_rate = 100 * passed_tests[i] / res.completed_tests
percents[i] = pass_rate
# console.print(f"{pass_rate:.1f}% correct after try {i+1}")
- setattr(res, f"pass_rate_{i+1}", f"{pass_rate:.1f}")
+ setattr(res, f"pass_rate_{i + 1}", f"{pass_rate:.1f}")
print(f"- dirname: {dirname.name}")
style = None if res.completed_tests in NUM_TESTS else "red"
@@ -393,10 +393,10 @@ def summarize_results(dirname):
console.print(f" {key}: {val}", style=style)
for i in range(tries):
- print(f" pass_rate_{i+1}: {percents[i]:.1f}")
+ print(f" pass_rate_{i + 1}: {percents[i]:.1f}")
pct_well_formed = 1.0 - res.num_with_malformed_responses / res.completed_tests
- print(f" percent_cases_well_formed: {pct_well_formed*100:.1f}")
+ print(f" percent_cases_well_formed: {pct_well_formed * 100:.1f}")
show("error_outputs")
show("num_malformed_responses")
@@ -564,7 +564,6 @@ def run_test_real(
fnames=fnames,
use_git=False,
stream=False,
- pretty=False,
verbose=verbose,
)
coder.max_apply_update_errors = max_apply_update_errors
From 277c8b32a71fec16c312ea3522769b5f0a05efd3 Mon Sep 17 00:00:00 2001
From: Paul Gauthier
Date: Tue, 13 Aug 2024 17:52:54 -0700
Subject: [PATCH 03/34] copy
---
aider/website/docs/usage/images-urls.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/aider/website/docs/usage/images-urls.md b/aider/website/docs/usage/images-urls.md
index 2cecfdef6..b3fb6797a 100644
--- a/aider/website/docs/usage/images-urls.md
+++ b/aider/website/docs/usage/images-urls.md
@@ -23,7 +23,7 @@ You can add images to the chat just like you would
add any other file:
- Use `/add ` from within the chat
-- Use `/add-clipboard-image` to paste an image from your clipboard into the chat.
+- Use `/clipboard` to paste an image from your clipboard into the chat.
- Launch aider with image filenames on the command line: `aider ` along with any other command line arguments you need.
## Web pages
From 060c8ff89ad6677bf69b6eb5b53f9a3e3ab94c94 Mon Sep 17 00:00:00 2001
From: Paul Gauthier
Date: Tue, 13 Aug 2024 18:06:00 -0700
Subject: [PATCH 04/34] override dotenv
---
benchmark/benchmark.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
index d3c61dedd..61f9bf1c4 100755
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -28,8 +28,6 @@ from aider.coders import Coder
from aider.dump import dump # noqa: F401
from aider.io import InputOutput
-load_dotenv()
-
BENCHMARK_DNAME = Path(os.environ.get("AIDER_BENCHMARK_DIR", "tmp.benchmarks"))
EXERCISES_DIR_DEFAULT = "exercism-python"
@@ -39,6 +37,8 @@ app = typer.Typer(add_completion=False, pretty_exceptions_enable=False)
NUM_TESTS = (89, 133)
+load_dotenv(override=True)
+
def show_stats(dirnames, graphs):
raw_rows = []
@@ -590,7 +590,7 @@ def run_test_real(
coder.apply_updates()
else:
- response = coder.run(with_message=instructions)
+ response = coder.run(with_message=instructions, preproc=False)
dur += time.time() - start
if not no_aider:
From 37512a532a9d96d2cb9d3293616149b342d8673b Mon Sep 17 00:00:00 2001
From: Paul Gauthier
Date: Tue, 13 Aug 2024 18:44:00 -0700
Subject: [PATCH 05/34] copy
---
HISTORY.md | 4 ++++
aider/website/HISTORY.md | 4 ++++
2 files changed, 8 insertions(+)
diff --git a/HISTORY.md b/HISTORY.md
index 2807e037d..6a520a8c2 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,6 +1,10 @@
# Release history
+### Aider v0.50.1
+
+- Bugfix for provider API exceptions.
+
### Aider v0.50.0
- Infinite output for DeepSeek Coder, Mistral models in addition to Anthropic's models.
diff --git a/aider/website/HISTORY.md b/aider/website/HISTORY.md
index d70bde900..96420b06a 100644
--- a/aider/website/HISTORY.md
+++ b/aider/website/HISTORY.md
@@ -16,6 +16,10 @@ cog.out(text)
# Release history
+### Aider v0.50.1
+
+- Bugfix for provider API exceptions.
+
### Aider v0.50.0
- Infinite output for DeepSeek Coder, Mistral models in addition to Anthropic's models.
From 044687cd99baaa77d7fab035264f8991f24dba64 Mon Sep 17 00:00:00 2001
From: Paul Gauthier
Date: Tue, 13 Aug 2024 18:47:55 -0700
Subject: [PATCH 06/34] version bump to 0.50.1
---
aider/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/aider/__init__.py b/aider/__init__.py
index 41d36926a..834178665 100644
--- a/aider/__init__.py
+++ b/aider/__init__.py
@@ -1 +1 @@
-__version__ = "0.50.1-dev"
+__version__ = "0.50.1"
From c84759d875e319dcfe9c84b89fe1e4a55c79319e Mon Sep 17 00:00:00 2001
From: Paul Gauthier
Date: Tue, 13 Aug 2024 18:49:54 -0700
Subject: [PATCH 07/34] set version to 0.50.2-dev
---
aider/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/aider/__init__.py b/aider/__init__.py
index 834178665..a7d6d6205 100644
--- a/aider/__init__.py
+++ b/aider/__init__.py
@@ -1 +1 @@
-__version__ = "0.50.1"
+__version__ = "0.50.2-dev"
From b1c376981380f38faa9f308c5e6c072492c27476 Mon Sep 17 00:00:00 2001
From: Paul Gauthier
Date: Tue, 13 Aug 2024 18:51:33 -0700
Subject: [PATCH 08/34] editblock prompt improvement to better edit code in
json wrapped formats like ipynb
Confirmed improvements on basic edits to a .ipynb file.
Confirmed no regressions against latest deepseek coder, sonnet, gpt-4o.
---
aider/coders/editblock_prompts.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/aider/coders/editblock_prompts.py b/aider/coders/editblock_prompts.py
index af3e4f3b7..7a2acdae9 100644
--- a/aider/coders/editblock_prompts.py
+++ b/aider/coders/editblock_prompts.py
@@ -125,8 +125,8 @@ Every *SEARCH/REPLACE block* must use this format:
7. The end of the replace block: >>>>>>> REPLACE
8. The closing fence: {fence[1]}
-Every *SEARCH* section must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
-
+Every *SEARCH* section must *EXACTLY MATCH* the existing file content, character for character, including all comments, docstrings, etc.
+If the file contains code or other data wrapped/escaped in json/xml/quotes or other containers, you need to propose edits to the literal contents of the file, including the container markup.
*SEARCH/REPLACE* blocks will replace *all* matching occurrences.
Include enough lines to make the SEARCH blocks uniquely match the lines to change.
From bcedaebe57cefd192317cf620c577a22e9e66c06 Mon Sep 17 00:00:00 2001
From: pcamp
Date: Wed, 14 Aug 2024 04:22:04 -0500
Subject: [PATCH 09/34] Fix typo in config.md
---
aider/website/docs/config.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/aider/website/docs/config.md b/aider/website/docs/config.md
index 40087c936..3b192c432 100644
--- a/aider/website/docs/config.md
+++ b/aider/website/docs/config.md
@@ -28,7 +28,7 @@ Using a `.aider.conf.yml` file:
dark-mode: true
```
-By setting an environgment variable:
+By setting an environment variable:
```
export AIDER_DARK_MODE=true
From 454408f9d58b7f4260fea9c94062792081c6ca3c Mon Sep 17 00:00:00 2001
From: Paul Gauthier
Date: Wed, 14 Aug 2024 06:11:45 -0700
Subject: [PATCH 10/34] Added chatgpt-4o-latest
---
aider/website/_data/edit_leaderboard.yml | 23 +++++++++++++++++++++++
aider/website/docs/leaderboards/index.md | 2 +-
2 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/aider/website/_data/edit_leaderboard.yml b/aider/website/_data/edit_leaderboard.yml
index 52f667849..72dd9c96b 100644
--- a/aider/website/_data/edit_leaderboard.yml
+++ b/aider/website/_data/edit_leaderboard.yml
@@ -843,4 +843,27 @@
versions: 0.48.1-dev
seconds_per_case: 6.5
total_cost: 0.0000
+
+- dirname: 2024-08-14-13-07-12--chatgpt-4o-latest-diff
+ test_cases: 133
+ model: chatgpt-4o-latest
+ edit_format: diff
+ commit_hash: b1c3769
+ pass_rate_1: 53.4
+ pass_rate_2: 69.2
+ percent_cases_well_formed: 97.7
+ error_outputs: 27
+ num_malformed_responses: 5
+ num_with_malformed_responses: 3
+ user_asks: 7
+ lazy_comments: 0
+ syntax_errors: 0
+ indentation_errors: 0
+ exhausted_context_windows: 0
+ test_timeouts: 0
+ command: aider --model openai/chatgpt-4o-latest
+ date: 2024-08-14
+ versions: 0.50.2-dev
+ seconds_per_case: 26.3
+ total_cost: 3.6113
\ No newline at end of file
diff --git a/aider/website/docs/leaderboards/index.md b/aider/website/docs/leaderboards/index.md
index 3efa40e8b..5e4255bdc 100644
--- a/aider/website/docs/leaderboards/index.md
+++ b/aider/website/docs/leaderboards/index.md
@@ -321,6 +321,6 @@ mod_dates = [get_last_modified_date(file) for file in files]
latest_mod_date = max(mod_dates)
cog.out(f"{latest_mod_date.strftime('%B %d, %Y.')}")
]]]-->
-August 10, 2024.
+August 14, 2024.
From 56975d02a16202502b17fc2257295bef5719279a Mon Sep 17 00:00:00 2001
From: Paul Gauthier
Date: Wed, 14 Aug 2024 06:20:31 -0700
Subject: [PATCH 11/34] fix: Update path to edit_leaderboard.yml file
---
benchmark/over_time.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/benchmark/over_time.py b/benchmark/over_time.py
index 0ea641d64..1c20dc336 100644
--- a/benchmark/over_time.py
+++ b/benchmark/over_time.py
@@ -2,7 +2,7 @@ import matplotlib.pyplot as plt
import yaml
from imgcat import imgcat
from matplotlib import rc
-
+from aider.dump import dump # noqa: 401
def plot_over_time(yaml_file):
with open(yaml_file, "r") as file:
@@ -57,4 +57,4 @@ def plot_over_time(yaml_file):
# Example usage
-plot_over_time("_data/edit_leaderboard.yml")
+plot_over_time("aider/website/_data/edit_leaderboard.yml")
From d2479f30f76f178bed794db9cc96b064206878a2 Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)"
Date: Wed, 14 Aug 2024 06:20:32 -0700
Subject: [PATCH 12/34] fix: Add debug prints and check for empty data in
over_time.py
---
benchmark/over_time.py | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/benchmark/over_time.py b/benchmark/over_time.py
index 1c20dc336..6143c0ece 100644
--- a/benchmark/over_time.py
+++ b/benchmark/over_time.py
@@ -12,11 +12,23 @@ def plot_over_time(yaml_file):
pass_rates = []
models = []
+ print("Debug: Raw data from YAML file:")
+ print(data)
+
for entry in data:
if "released" in entry and "pass_rate_2" in entry:
dates.append(entry["released"])
pass_rates.append(entry["pass_rate_2"])
models.append(entry["model"].split("(")[0].strip())
+
+ print("Debug: Processed data:")
+ print("Dates:", dates)
+ print("Pass rates:", pass_rates)
+ print("Models:", models)
+
+ if not dates or not pass_rates:
+ print("Error: No data to plot. Check if the YAML file is empty or if the data is in the expected format.")
+ return
plt.rcParams["hatch.linewidth"] = 0.5
plt.rcParams["hatch.color"] = "#444444"
From d94d5aa3fa88dcf56a350d5c0a8cb041fda9508b Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)"
Date: Wed, 14 Aug 2024 06:20:36 -0700
Subject: [PATCH 13/34] style: format code according to linter rules
---
benchmark/over_time.py | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/benchmark/over_time.py b/benchmark/over_time.py
index 6143c0ece..f073ad9eb 100644
--- a/benchmark/over_time.py
+++ b/benchmark/over_time.py
@@ -2,7 +2,9 @@ import matplotlib.pyplot as plt
import yaml
from imgcat import imgcat
from matplotlib import rc
-from aider.dump import dump # noqa: 401
+
+from aider.dump import dump # noqa: 401
+
def plot_over_time(yaml_file):
with open(yaml_file, "r") as file:
@@ -20,14 +22,17 @@ def plot_over_time(yaml_file):
dates.append(entry["released"])
pass_rates.append(entry["pass_rate_2"])
models.append(entry["model"].split("(")[0].strip())
-
+
print("Debug: Processed data:")
print("Dates:", dates)
print("Pass rates:", pass_rates)
print("Models:", models)
if not dates or not pass_rates:
- print("Error: No data to plot. Check if the YAML file is empty or if the data is in the expected format.")
+ print(
+ "Error: No data to plot. Check if the YAML file is empty or if the data is in the"
+ " expected format."
+ )
return
plt.rcParams["hatch.linewidth"] = 0.5
@@ -58,7 +63,9 @@ def plot_over_time(yaml_file):
)
ax.set_xlabel("Model release date", fontsize=18, color="#555")
- ax.set_ylabel("Aider code editing benchmark,\npercent completed correctly", fontsize=18, color="#555")
+ ax.set_ylabel(
+ "Aider code editing benchmark,\npercent completed correctly", fontsize=18, color="#555"
+ )
ax.set_title("LLM code editing skill by model release date", fontsize=20)
ax.set_ylim(0, 30)
plt.xticks(fontsize=14)
From d8c8c51156d89bff3cc4d4be69ef09f40655960b Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)"
Date: Wed, 14 Aug 2024 06:21:45 -0700
Subject: [PATCH 14/34] The commit message for these changes would be:
feat: Improve graph visualization and add debugging
The changes made in this commit include:
1. Adjusting the y-axis limit to 100 to accommodate the higher pass rate values.
2. Rotating the x-axis labels for better readability.
3. Adding debug print statements to track the progress of figure generation and display.
4. Increasing the figure size for better visibility.
5. Adding additional debugging to ensure the data is being plotted correctly.
These improvements should help with the visualization and debugging of the graph generation process.
---
benchmark/over_time.py | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/benchmark/over_time.py b/benchmark/over_time.py
index f073ad9eb..2e04c7881 100644
--- a/benchmark/over_time.py
+++ b/benchmark/over_time.py
@@ -41,7 +41,9 @@ def plot_over_time(yaml_file):
rc("font", **{"family": "sans-serif", "sans-serif": ["Helvetica"], "size": 10})
plt.rcParams["text.color"] = "#444444"
- fig, ax = plt.subplots(figsize=(10, 5))
+ fig, ax = plt.subplots(figsize=(12, 6)) # Increase figure size for better visibility
+
+ print("Debug: Figure created. Plotting data...")
ax.grid(axis="y", zorder=0, lw=0.2)
for spine in ax.spines.values():
spine.set_edgecolor("#DDDDDD")
@@ -67,12 +69,18 @@ def plot_over_time(yaml_file):
"Aider code editing benchmark,\npercent completed correctly", fontsize=18, color="#555"
)
ax.set_title("LLM code editing skill by model release date", fontsize=20)
- ax.set_ylim(0, 30)
- plt.xticks(fontsize=14)
+ ax.set_ylim(0, 100) # Adjust y-axis limit to accommodate higher values
+ plt.xticks(fontsize=14, rotation=45, ha='right') # Rotate x-axis labels for better readability
plt.tight_layout(pad=3.0)
+
+ print("Debug: Saving figures...")
plt.savefig("tmp_over_time.png")
plt.savefig("tmp_over_time.svg")
+
+ print("Debug: Displaying figure with imgcat...")
imgcat(fig)
+
+ print("Debug: Figure generation complete.")
# Example usage
From 1f59687e9ddf102d755a2149812121c1d9c59864 Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)"
Date: Wed, 14 Aug 2024 06:21:48 -0700
Subject: [PATCH 15/34] style: Format code with linter
---
benchmark/over_time.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/benchmark/over_time.py b/benchmark/over_time.py
index 2e04c7881..884dfefc3 100644
--- a/benchmark/over_time.py
+++ b/benchmark/over_time.py
@@ -42,7 +42,7 @@ def plot_over_time(yaml_file):
plt.rcParams["text.color"] = "#444444"
fig, ax = plt.subplots(figsize=(12, 6)) # Increase figure size for better visibility
-
+
print("Debug: Figure created. Plotting data...")
ax.grid(axis="y", zorder=0, lw=0.2)
for spine in ax.spines.values():
@@ -70,16 +70,16 @@ def plot_over_time(yaml_file):
)
ax.set_title("LLM code editing skill by model release date", fontsize=20)
ax.set_ylim(0, 100) # Adjust y-axis limit to accommodate higher values
- plt.xticks(fontsize=14, rotation=45, ha='right') # Rotate x-axis labels for better readability
+ plt.xticks(fontsize=14, rotation=45, ha="right") # Rotate x-axis labels for better readability
plt.tight_layout(pad=3.0)
-
+
print("Debug: Saving figures...")
plt.savefig("tmp_over_time.png")
plt.savefig("tmp_over_time.svg")
-
+
print("Debug: Displaying figure with imgcat...")
imgcat(fig)
-
+
print("Debug: Figure generation complete.")
From c4f70d81b72853c7597086dc2611f15cbd8fe2cd Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)"
Date: Wed, 14 Aug 2024 06:22:48 -0700
Subject: [PATCH 16/34] feat: add new color for all "-4o-" models except
"gpt-4o-mini"
---
benchmark/over_time.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/benchmark/over_time.py b/benchmark/over_time.py
index 884dfefc3..f04da3730 100644
--- a/benchmark/over_time.py
+++ b/benchmark/over_time.py
@@ -50,7 +50,10 @@ def plot_over_time(yaml_file):
spine.set_linewidth(0.5)
colors = [
- "red" if "gpt-4" in model else "green" if "gpt-3.5" in model else "blue" for model in models
+ "orange" if "-4o-" in model and "gpt-4o-mini" not in model
+ else "red" if "gpt-4" in model
+ else "green" if "gpt-3.5" in model
+ else "blue" for model in models
]
ax.scatter(dates, pass_rates, c=colors, alpha=0.5, s=120)
From 1f6cadcc66172d21e844eca2930192dfb7ba2eef Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)"
Date: Wed, 14 Aug 2024 06:22:51 -0700
Subject: [PATCH 17/34] style: Refactor conditional logic in color assignment
---
benchmark/over_time.py | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/benchmark/over_time.py b/benchmark/over_time.py
index f04da3730..fc0e13f9c 100644
--- a/benchmark/over_time.py
+++ b/benchmark/over_time.py
@@ -50,10 +50,12 @@ def plot_over_time(yaml_file):
spine.set_linewidth(0.5)
colors = [
- "orange" if "-4o-" in model and "gpt-4o-mini" not in model
- else "red" if "gpt-4" in model
- else "green" if "gpt-3.5" in model
- else "blue" for model in models
+ (
+ "orange"
+ if "-4o-" in model and "gpt-4o-mini" not in model
+ else "red" if "gpt-4" in model else "green" if "gpt-3.5" in model else "blue"
+ )
+ for model in models
]
ax.scatter(dates, pass_rates, c=colors, alpha=0.5, s=120)
From 714fd45f4d3efe1206ea9aa6002f5a08aa97b05b Mon Sep 17 00:00:00 2001
From: Paul Gauthier
Date: Wed, 14 Aug 2024 06:27:47 -0700
Subject: [PATCH 18/34] fix: Update color logic and font size in over_time.py
---
benchmark/over_time.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/benchmark/over_time.py b/benchmark/over_time.py
index fc0e13f9c..2c2116628 100644
--- a/benchmark/over_time.py
+++ b/benchmark/over_time.py
@@ -52,7 +52,7 @@ def plot_over_time(yaml_file):
colors = [
(
"orange"
- if "-4o-" in model and "gpt-4o-mini" not in model
+ if "-4o" in model and "gpt-4o-mini" not in model
else "red" if "gpt-4" in model else "green" if "gpt-3.5" in model else "blue"
)
for model in models
@@ -63,7 +63,7 @@ def plot_over_time(yaml_file):
ax.annotate(
model,
(dates[i], pass_rates[i]),
- fontsize=12,
+ fontsize=8,
alpha=0.75,
xytext=(5, 5),
textcoords="offset points",
From 1cdbc769746a838d12f9a9b344dffc5e1868d671 Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)"
Date: Wed, 14 Aug 2024 06:27:48 -0700
Subject: [PATCH 19/34] feat: Connect model family lines in over_time plot
---
benchmark/over_time.py | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/benchmark/over_time.py b/benchmark/over_time.py
index 2c2116628..6215c5411 100644
--- a/benchmark/over_time.py
+++ b/benchmark/over_time.py
@@ -57,6 +57,25 @@ def plot_over_time(yaml_file):
)
for model in models
]
+
+ # Separate data points by color
+ orange_points = [(d, r) for d, r, c in zip(dates, pass_rates, colors) if c == "orange"]
+ red_points = [(d, r) for d, r, c in zip(dates, pass_rates, colors) if c == "red"]
+ green_points = [(d, r) for d, r, c in zip(dates, pass_rates, colors) if c == "green"]
+ blue_points = [(d, r) for d, r, c in zip(dates, pass_rates, colors) if c == "blue"]
+
+ # Plot lines for orange, red, and green points
+ if orange_points:
+ orange_dates, orange_rates = zip(*sorted(orange_points))
+ ax.plot(orange_dates, orange_rates, c="orange", alpha=0.5, linewidth=1)
+ if red_points:
+ red_dates, red_rates = zip(*sorted(red_points))
+ ax.plot(red_dates, red_rates, c="red", alpha=0.5, linewidth=1)
+ if green_points:
+ green_dates, green_rates = zip(*sorted(green_points))
+ ax.plot(green_dates, green_rates, c="green", alpha=0.5, linewidth=1)
+
+ # Plot all points
ax.scatter(dates, pass_rates, c=colors, alpha=0.5, s=120)
for i, model in enumerate(models):
From a7290be843c6a6af2649556b1a81b02bf5a90b3c Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)"
Date: Wed, 14 Aug 2024 06:27:51 -0700
Subject: [PATCH 20/34] style: Apply linter formatting changes
---
benchmark/over_time.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/benchmark/over_time.py b/benchmark/over_time.py
index 6215c5411..8eec5d603 100644
--- a/benchmark/over_time.py
+++ b/benchmark/over_time.py
@@ -57,7 +57,7 @@ def plot_over_time(yaml_file):
)
for model in models
]
-
+
# Separate data points by color
orange_points = [(d, r) for d, r, c in zip(dates, pass_rates, colors) if c == "orange"]
red_points = [(d, r) for d, r, c in zip(dates, pass_rates, colors) if c == "red"]
From fb0b348bec51dc093c4ea1472cca3f8020a5ca34 Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)"
Date: Wed, 14 Aug 2024 06:28:28 -0700
Subject: [PATCH 21/34] fix: Remove unused `blue_points` variable
---
benchmark/over_time.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/benchmark/over_time.py b/benchmark/over_time.py
index 8eec5d603..2204eae45 100644
--- a/benchmark/over_time.py
+++ b/benchmark/over_time.py
@@ -62,7 +62,6 @@ def plot_over_time(yaml_file):
orange_points = [(d, r) for d, r, c in zip(dates, pass_rates, colors) if c == "orange"]
red_points = [(d, r) for d, r, c in zip(dates, pass_rates, colors) if c == "red"]
green_points = [(d, r) for d, r, c in zip(dates, pass_rates, colors) if c == "green"]
- blue_points = [(d, r) for d, r, c in zip(dates, pass_rates, colors) if c == "blue"]
# Plot lines for orange, red, and green points
if orange_points:
From d2b4846b956b8f12d351964c861418b20848d389 Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)"
Date: Wed, 14 Aug 2024 06:29:13 -0700
Subject: [PATCH 22/34] feat: Replace orange color with purple for "-4o" models
---
benchmark/over_time.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/benchmark/over_time.py b/benchmark/over_time.py
index 2204eae45..12e5dde5a 100644
--- a/benchmark/over_time.py
+++ b/benchmark/over_time.py
@@ -51,7 +51,7 @@ def plot_over_time(yaml_file):
colors = [
(
- "orange"
+ "purple"
if "-4o" in model and "gpt-4o-mini" not in model
else "red" if "gpt-4" in model else "green" if "gpt-3.5" in model else "blue"
)
@@ -59,14 +59,14 @@ def plot_over_time(yaml_file):
]
# Separate data points by color
- orange_points = [(d, r) for d, r, c in zip(dates, pass_rates, colors) if c == "orange"]
+ purple_points = [(d, r) for d, r, c in zip(dates, pass_rates, colors) if c == "purple"]
red_points = [(d, r) for d, r, c in zip(dates, pass_rates, colors) if c == "red"]
green_points = [(d, r) for d, r, c in zip(dates, pass_rates, colors) if c == "green"]
- # Plot lines for orange, red, and green points
- if orange_points:
- orange_dates, orange_rates = zip(*sorted(orange_points))
- ax.plot(orange_dates, orange_rates, c="orange", alpha=0.5, linewidth=1)
+ # Plot lines for purple, red, and green points
+ if purple_points:
+ purple_dates, purple_rates = zip(*sorted(purple_points))
+ ax.plot(purple_dates, purple_rates, c="purple", alpha=0.5, linewidth=1)
if red_points:
red_dates, red_rates = zip(*sorted(red_points))
ax.plot(red_dates, red_rates, c="red", alpha=0.5, linewidth=1)
From 0a3c6bfbe721eb4765e293f69da6ed759d2aaa5b Mon Sep 17 00:00:00 2001
From: "Paul Gauthier (aider)"
Date: Wed, 14 Aug 2024 06:29:48 -0700
Subject: [PATCH 23/34] feat: Change blue color to light blue in plot_over_time
function
---
benchmark/over_time.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/benchmark/over_time.py b/benchmark/over_time.py
index 12e5dde5a..565038a8e 100644
--- a/benchmark/over_time.py
+++ b/benchmark/over_time.py
@@ -53,7 +53,7 @@ def plot_over_time(yaml_file):
(
"purple"
if "-4o" in model and "gpt-4o-mini" not in model
- else "red" if "gpt-4" in model else "green" if "gpt-3.5" in model else "blue"
+ else "red" if "gpt-4" in model else "green" if "gpt-3.5" in model else "lightblue"
)
for model in models
]
From 1ced72b7286de71f6d8aac97c3f038842ef0b52e Mon Sep 17 00:00:00 2001
From: Paul Gauthier
Date: Wed, 14 Aug 2024 06:31:20 -0700
Subject: [PATCH 24/34] update models-over-time
---
aider/website/_data/edit_leaderboard.yml | 12 +
aider/website/assets/models-over-time.svg | 945 ++++++++++++++++------
2 files changed, 722 insertions(+), 235 deletions(-)
diff --git a/aider/website/_data/edit_leaderboard.yml b/aider/website/_data/edit_leaderboard.yml
index 72dd9c96b..426c86336 100644
--- a/aider/website/_data/edit_leaderboard.yml
+++ b/aider/website/_data/edit_leaderboard.yml
@@ -577,6 +577,7 @@
pass_rate_2: 77.4
percent_cases_well_formed: 99.2
error_outputs: 23
+ released: 2024-06-20
num_malformed_responses: 4
num_with_malformed_responses: 1
user_asks: 2
@@ -603,6 +604,7 @@
num_malformed_responses: 0
num_with_malformed_responses: 0
user_asks: 0
+ released: 2024-03-13
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
@@ -644,6 +646,7 @@
commit_hash: d31eef3-dirty
pass_rate_1: 40.6
pass_rate_2: 55.6
+ released: 2024-07-18
percent_cases_well_formed: 100.0
error_outputs: 1
num_malformed_responses: 0
@@ -668,6 +671,7 @@
pass_rate_1: 60.9
pass_rate_2: 69.9
percent_cases_well_formed: 97.7
+ released: 2024-06-28
error_outputs: 58
num_malformed_responses: 13
num_with_malformed_responses: 3
@@ -690,6 +694,7 @@
commit_hash: f7ce78b-dirty
pass_rate_1: 46.6
pass_rate_2: 63.9
+ released: 2024-07-23
percent_cases_well_formed: 92.5
error_outputs: 84
num_malformed_responses: 19
@@ -716,6 +721,7 @@
percent_cases_well_formed: 100.0
error_outputs: 0
num_malformed_responses: 0
+ released: 2024-07-23
num_with_malformed_responses: 0
user_asks: 0
lazy_comments: 0
@@ -738,6 +744,7 @@
pass_rate_2: 72.9
percent_cases_well_formed: 97.7
error_outputs: 13
+ released: 2024-07-24
num_malformed_responses: 3
num_with_malformed_responses: 3
user_asks: 1
@@ -763,6 +770,7 @@
error_outputs: 3
num_malformed_responses: 0
num_with_malformed_responses: 0
+ released: 2024-07-24
user_asks: 3
lazy_comments: 0
syntax_errors: 1
@@ -785,6 +793,7 @@
percent_cases_well_formed: 100.0
error_outputs: 27
num_malformed_responses: 0
+ released: 2024-07-23
num_with_malformed_responses: 0
user_asks: 23
lazy_comments: 8
@@ -810,6 +819,7 @@
num_malformed_responses: 0
num_with_malformed_responses: 0
user_asks: 0
+ released: 2024-07-23
lazy_comments: 0
syntax_errors: 0
indentation_errors: 0
@@ -838,6 +848,7 @@
indentation_errors: 2
exhausted_context_windows: 0
test_timeouts: 5
+ released: 2024-08-06
command: aider --model openai/gpt-4o-2024-08-06
date: 2024-08-06
versions: 0.48.1-dev
@@ -863,6 +874,7 @@
test_timeouts: 0
command: aider --model openai/chatgpt-4o-latest
date: 2024-08-14
+ released: 2024-08-08
versions: 0.50.2-dev
seconds_per_case: 26.3
total_cost: 3.6113
diff --git a/aider/website/assets/models-over-time.svg b/aider/website/assets/models-over-time.svg
index 994dab6ae..a4fe87061 100644
--- a/aider/website/assets/models-over-time.svg
+++ b/aider/website/assets/models-over-time.svg
@@ -1,16 +1,16 @@
-