diff --git a/_posts/2024-03-08-claude-3.md b/_posts/2024-03-08-claude-3.md index b1ed14a29..8d71cec99 100644 --- a/_posts/2024-03-08-claude-3.md +++ b/_posts/2024-03-08-claude-3.md @@ -1,7 +1,7 @@ --- title: Claude 3 beats GPT-4 on Aider's code editing benchmark excerpt: Claude 3 Opus outperforms all of OpenAI's models on Aider's code editing benchmark, making it the best available model for pair programming with AI. -highlight_image: /assets/2024-03-07-claude-3.svg +highlight_image: /assets/2024-03-07-claude-3.jpg --- # Claude 3 beats GPT-4 on Aider's code editing benchmark diff --git a/_posts/2024-04-09-gpt-4-turbo.md b/_posts/2024-04-09-gpt-4-turbo.md index a9654ee6a..5394ab6a7 100644 --- a/_posts/2024-04-09-gpt-4-turbo.md +++ b/_posts/2024-04-09-gpt-4-turbo.md @@ -1,7 +1,7 @@ --- title: GPT-4 Turbo with Vision is a step backwards for coding excerpt: OpenAI's GPT-4 Turbo with Vision model scores worse on aider's code editing benchmarks than all the previous GPT-4 models. In particular, it seems much more prone to "lazy coding" than the existing GPT-4 Turbo "preview" models. -highlight_image: /assets/2024-04-09-gpt-4-turbo-laziness.svg +highlight_image: /assets/2024-04-09-gpt-4-turbo-laziness.jpg --- # GPT-4 Turbo with Vision is a step backwards for coding diff --git a/assets/2024-03-07-claude-3.jpg b/assets/2024-03-07-claude-3.jpg new file mode 100644 index 000000000..2787da95f Binary files /dev/null and b/assets/2024-03-07-claude-3.jpg differ diff --git a/assets/2024-04-09-gpt-4-turbo-laziness.jpg b/assets/2024-04-09-gpt-4-turbo-laziness.jpg new file mode 100644 index 000000000..d2a608927 Binary files /dev/null and b/assets/2024-04-09-gpt-4-turbo-laziness.jpg differ diff --git a/assets/2024-04-09-gpt-4-turbo.jpg b/assets/2024-04-09-gpt-4-turbo.jpg new file mode 100644 index 000000000..96ae722bf Binary files /dev/null and b/assets/2024-04-09-gpt-4-turbo.jpg differ diff --git a/assets/benchmarks-0125.jpg b/assets/benchmarks-0125.jpg new file mode 100644 index 000000000..f83d223a5 Binary files /dev/null and b/assets/benchmarks-0125.jpg differ diff --git a/assets/benchmarks-1106.jpg b/assets/benchmarks-1106.jpg new file mode 100644 index 000000000..eb487bcb9 Binary files /dev/null and b/assets/benchmarks-1106.jpg differ diff --git a/assets/benchmarks-speed-1106.jpg b/assets/benchmarks-speed-1106.jpg new file mode 100644 index 000000000..8407e7bfe Binary files /dev/null and b/assets/benchmarks-speed-1106.jpg differ diff --git a/assets/benchmarks.jpg b/assets/benchmarks.jpg new file mode 100644 index 000000000..d3ad1ecb5 Binary files /dev/null and b/assets/benchmarks.jpg differ diff --git a/docs/benchmarks-0125.md b/docs/benchmarks-0125.md index 178c3a010..691558e24 100644 --- a/docs/benchmarks-0125.md +++ b/docs/benchmarks-0125.md @@ -1,7 +1,7 @@ --- title: The January GPT-4 Turbo is lazier than the last version excerpt: The new `gpt-4-0125-preview` model is quantiatively lazier at coding than previous GPT-4 versions, according to a new "laziness" benchmark. -highlight_image: /assets/benchmarks-0125.svg +highlight_image: /assets/benchmarks-0125.jpg --- # The January GPT-4 Turbo is lazier than the last version diff --git a/docs/benchmarks-1106.md b/docs/benchmarks-1106.md index 90ae20255..f6e364b58 100644 --- a/docs/benchmarks-1106.md +++ b/docs/benchmarks-1106.md @@ -1,7 +1,7 @@ --- title: Code editing benchmarks for OpenAI's "1106" models excerpt: A quantitative comparison of the code editing capabilities of the new GPT-3.5 and GPT-4 versions that were released in Nov 2023. -highlight_image: /assets/benchmarks-1106.svg +highlight_image: /assets/benchmarks-1106.jpg --- # Code editing benchmarks for OpenAI's "1106" models diff --git a/docs/benchmarks-speed-1106.md b/docs/benchmarks-speed-1106.md index b175cc404..b91316652 100644 --- a/docs/benchmarks-speed-1106.md +++ b/docs/benchmarks-speed-1106.md @@ -2,7 +2,7 @@ title: Speed benchmarks of GPT-4 Turbo and gpt-3.5-turbo-1106 excerpt: This report provides a detailed comparison of the speed of GPT-4 Turbo and gpt-3.5-turbo-1106 models based on the aider benchmarking suite. canonical_url: https://aider.chat/2023/11/06/benchmarks-speed-1106.html -highlight_image: /assets/benchmarks-speed-1106.svg +highlight_image: /assets/benchmarks-speed-1106.jpg --- # Speed benchmarks of GPT-4 Turbo and gpt-3.5-turbo-1106 diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 5ad53108c..41742f274 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -1,7 +1,7 @@ --- title: GPT code editing benchmarks excerpt: Benchmarking GPT-3.5 and GPT-4 code editing skill using a new code editing benchmark suite based on the Exercism python exercises. -highlight_image: /assets/benchmarks.svg +highlight_image: /assets/benchmarks.jpg --- # GPT code editing benchmarks