diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
index 2aa59c607..823a3582f 100755
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -602,9 +602,6 @@ def summarize_results(dirname, stats_languages=None):
     language_tests = defaultdict(int)
     language_passed = defaultdict(lambda: [0] * tries)
 
-    # Initialize new metrics
-    res.total_api_calls = 0
-
     res.completed_tests = 0
     res.duration = 0
     res.cost = 0
@@ -642,9 +639,6 @@ def summarize_results(dirname, stats_languages=None):
                 for i in range(len(tests_outcomes) - 1, tries):
                     language_passed[language][i] += 1
 
-        # Track API calls
-        res.total_api_calls += results.get("num_api_calls", 0)
-
         res.cost += results.get("cost", 0)
         res.duration += results.get("duration", 0)
         res.test_timeouts += results.get("test_timeouts", 0)
@@ -719,11 +713,6 @@ def summarize_results(dirname, stats_languages=None):
     pct_well_formed = 1.0 - res.num_with_malformed_responses / res.completed_tests
     print(f"  percent_cases_well_formed: {pct_well_formed * 100:.1f}")
 
-    # Display API calls
-    print(f"  total_api_calls: {res.total_api_calls}")
-    if res.completed_tests > 0:
-        print(f"  avg_api_calls_per_test: {res.total_api_calls / res.completed_tests:.2f}")
-
     # Display language-specific pass rates
     if languages:
         # Process language-specific pass rates without breaking YAML format
@@ -1097,9 +1086,6 @@ def run_test_real(
             language = part
             break
 
-    # Calculate the number of API calls from the chat hashes
-    num_api_calls = len(coder.chat_completion_call_hashes)
-
     results = dict(
         testdir=str(testdir),
         testcase=testdir.name,
@@ -1120,7 +1106,6 @@ def run_test_real(
         lazy_comments=lazy_comments,  # Add the count of pattern matches to the results
         reasoning_effort=reasoning_effort,
         thinking_tokens=thinking_tokens,
-        num_api_calls=num_api_calls,  # Add the number of API calls
         chat_hashes=list(
             zip(
                 coder.chat_completion_call_hashes,