copy

2025-05-31 17:55:01 +00:00 · 2024-11-24 14:52:01 -08:00 · 2024-11-24 14:52:01 -08:00 · 86619052ca
commit 86619052ca
parent 0c59d3234e
2 changed files with 45 additions and 40 deletions
--- a/aider/website/_data/quant.yml
+++ b/aider/website/_data/quant.yml
@ -114,29 +114,29 @@
  seconds_per_case: 98.4
  total_cost: 0.0000

- dirname: 2024-11-20-15-17-37--qwen25-32b-or-diff
+- dirname: 2024-11-24-22-18-18--or-all-or-fixed-blank-messages2
  test_cases: 133
  model: "OpenRouter: multiple"
  edit_format: diff
-  commit_hash: e917424
-  pass_rate_1: 49.6
-  pass_rate_2: 65.4
-  percent_cases_well_formed: 84.2
-  error_outputs: 43
-  num_malformed_responses: 31
-  num_with_malformed_responses: 21
-  user_asks: 43
+  commit_hash: 0c59d32
+  pass_rate_1: 57.1
+  pass_rate_2: 67.7
+  percent_cases_well_formed: 95.5
+  error_outputs: 56
+  num_malformed_responses: 10
+  num_with_malformed_responses: 6
+  user_asks: 14
  lazy_comments: 0
-  syntax_errors: 2
-  indentation_errors: 2
-  exhausted_context_windows: 12
-  test_timeouts: 2
+  syntax_errors: 6
+  indentation_errors: 0
+  exhausted_context_windows: 3
+  test_timeouts: 1
  command: aider --model openrouter/qwen/qwen-2.5-coder-32b-instruct
-  date: 2024-11-20
-  versions: 0.63.3.dev
-  seconds_per_case: 40.7
-  total_cost: 0.1497
-
+  date: 2024-11-24
+  versions: 0.64.2.dev
+  seconds_per_case: 21.2
+  total_cost: 0.1420
+  
 - dirname: 2024-11-23-21-08-53--ollama-qwen2.5-coder:32b-instruct-q4_K_M-8kctx
  test_cases: 133
  model: "Ollama: q4_K_M"
@ -252,29 +252,29 @@
  seconds_per_case: 16.1
  total_cost: 0.1391

- dirname: 2024-11-24-14-36-59--qwen25-32b-or-hyperbolic
+- dirname: 2024-11-24-22-03-19--or-hyperbolic-or-fixed-blank-messages2
  test_cases: 133
  model: "Hyperbolic via OpenRouter: BF16"
  edit_format: diff
-  commit_hash: c2f184f
-  pass_rate_1: 40.6
-  pass_rate_2: 46.6
-  percent_cases_well_formed: 83.5
-  error_outputs: 80
-  num_malformed_responses: 73
-  num_with_malformed_responses: 22
-  user_asks: 56
+  commit_hash: 0c59d32
+  pass_rate_1: 55.6
+  pass_rate_2: 68.4
+  percent_cases_well_formed: 89.5
+  error_outputs: 28
+  num_malformed_responses: 24
+  num_with_malformed_responses: 14
+  user_asks: 29
  lazy_comments: 0
-  syntax_errors: 15
+  syntax_errors: 1
  indentation_errors: 0
-  exhausted_context_windows: 5
-  test_timeouts: 0
+  exhausted_context_windows: 4
+  test_timeouts: 1
  command: aider --model openrouter/qwen/qwen-2.5-coder-32b-instruct
  date: 2024-11-24
  versions: 0.64.2.dev
-  seconds_per_case: 110.0
-  total_cost: 0.1763
-
+  seconds_per_case: 41.5
+  total_cost: 0.1402
+  
 - dirname: 2024-11-24-15-00-50--qwen25-32b-or-deepinfra
  test_cases: 133
  model: "Deepinfra via OpenRouter: BF16"
--- a/aider/website/_posts/2024-11-21-quantization.md
+++ b/aider/website/_posts/2024-11-21-quantization.md
@ -44,9 +44,6 @@ when their API is accessed directly.
 - TOC
 {:toc}

-{: .note }
-This article is being updated as additional benchmark runs complete.
-
 ## Benchmark results

 <canvas id="quantChart" width="800" height="600" style="margin: 20px 0"></canvas>
@ -152,7 +149,15 @@ OpenRouter allows you to ignore specific providers in your
 This can be used to limit your OpenRouter requests to be
 served by only your preferred providers.

-{: .note }
-Earlier versions of this article included incorrect Ollama models,
-and also included some Ollama results with the too small default 2k
-context window.
+## Notes
+
+This article went through many revisions as I received feedback from
+numerous members of the community.
+Here are some of the noteworthy learnings and changes:
+
+- The first version of this article included incorrect Ollama models.
+- Earlier Ollama results used the too small default 2k context window,
+artificially harming the benchmark results.
+- The benchmark results appear to have uncovered a problem in the way
+OpenRouter was communicating with Hyperbolic.
+They fixed the issue 11/24/24, shortly after it was pointed out.