From 35fed777db4236d6f5ecdc4ab8754223ed4857b4 Mon Sep 17 00:00:00 2001
From: AJ <yspdev@gmail.com>
Date: Thu, 24 Apr 2025 19:53:06 -0700
Subject: [PATCH] update language print

---
 benchmark/benchmark.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
index f8a294f19..6c64446c6 100755
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -609,15 +609,17 @@ def summarize_results(dirname, stats_languages=None):
 
     # Display language-specific pass rates
     if languages:
-        print("\n  Language-specific pass rates:")
+        # Process language-specific pass rates without breaking YAML format
         for language in sorted(languages):
-            for i in range(tries):
-                if language_tests[language] > 0:
+            if language_tests[language] > 0:
+                # Only print pass rate 2 for each language
+                if tries >= 2:  # Make sure we have at least 2 tries
+                    i = 1  # Index for pass_rate_2 (0-based index)
                     lang_pass_rate = 100 * language_passed[language][i] / language_tests[language]
-                    print(f"    {language}_pass_rate_{i + 1}: {lang_pass_rate:.1f}")
-                    # Store in the result object for potential use in graphs
-                    setattr(res, f"{language}_pass_rate_{i + 1}", f"{lang_pass_rate:.1f}")
-                    setattr(res, f"{language}_pass_num_{i + 1}", language_passed[language][i])
+                    print(f"  {language}_pass_rate_2: {lang_pass_rate:.1f}")
+                    # Still store all the data in the result object for potential use in graphs
+                    setattr(res, f"{language}_pass_rate_2", f"{lang_pass_rate:.1f}")
+                    setattr(res, f"{language}_pass_num_2", language_passed[language][i])
                     setattr(res, f"{language}_tests", language_tests[language])
 
     show("error_outputs")
@@ -977,6 +979,9 @@ def run_test_real(
             language = part
             break
 
+    # Calculate the number of API calls from the chat hashes
+    num_api_calls = len(coder.chat_completion_call_hashes)
+
     results = dict(
         testdir=str(testdir),
         testcase=testdir.name,
@@ -997,6 +1002,7 @@ def run_test_real(
         lazy_comments=lazy_comments,  # Add the count of pattern matches to the results
         reasoning_effort=reasoning_effort,
         thinking_tokens=thinking_tokens,
+        num_api_calls=num_api_calls,  # Add the number of API calls
         chat_hashes=list(
             zip(
                 coder.chat_completion_call_hashes,