mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-31 09:44:59 +00:00
Remove retry tracking and display from benchmark
This commit is contained in:
parent
d8e511ea2f
commit
cbd744df0e
1 changed files with 0 additions and 15 deletions
|
@ -602,9 +602,6 @@ def summarize_results(dirname, stats_languages=None):
|
||||||
language_tests = defaultdict(int)
|
language_tests = defaultdict(int)
|
||||||
language_passed = defaultdict(lambda: [0] * tries)
|
language_passed = defaultdict(lambda: [0] * tries)
|
||||||
|
|
||||||
# Initialize new metrics
|
|
||||||
res.total_api_calls = 0
|
|
||||||
|
|
||||||
res.completed_tests = 0
|
res.completed_tests = 0
|
||||||
res.duration = 0
|
res.duration = 0
|
||||||
res.cost = 0
|
res.cost = 0
|
||||||
|
@ -642,9 +639,6 @@ def summarize_results(dirname, stats_languages=None):
|
||||||
for i in range(len(tests_outcomes) - 1, tries):
|
for i in range(len(tests_outcomes) - 1, tries):
|
||||||
language_passed[language][i] += 1
|
language_passed[language][i] += 1
|
||||||
|
|
||||||
# Track API calls
|
|
||||||
res.total_api_calls += results.get("num_api_calls", 0)
|
|
||||||
|
|
||||||
res.cost += results.get("cost", 0)
|
res.cost += results.get("cost", 0)
|
||||||
res.duration += results.get("duration", 0)
|
res.duration += results.get("duration", 0)
|
||||||
res.test_timeouts += results.get("test_timeouts", 0)
|
res.test_timeouts += results.get("test_timeouts", 0)
|
||||||
|
@ -719,11 +713,6 @@ def summarize_results(dirname, stats_languages=None):
|
||||||
pct_well_formed = 1.0 - res.num_with_malformed_responses / res.completed_tests
|
pct_well_formed = 1.0 - res.num_with_malformed_responses / res.completed_tests
|
||||||
print(f" percent_cases_well_formed: {pct_well_formed * 100:.1f}")
|
print(f" percent_cases_well_formed: {pct_well_formed * 100:.1f}")
|
||||||
|
|
||||||
# Display API calls
|
|
||||||
print(f" total_api_calls: {res.total_api_calls}")
|
|
||||||
if res.completed_tests > 0:
|
|
||||||
print(f" avg_api_calls_per_test: {res.total_api_calls / res.completed_tests:.2f}")
|
|
||||||
|
|
||||||
# Display language-specific pass rates
|
# Display language-specific pass rates
|
||||||
if languages:
|
if languages:
|
||||||
# Process language-specific pass rates without breaking YAML format
|
# Process language-specific pass rates without breaking YAML format
|
||||||
|
@ -1097,9 +1086,6 @@ def run_test_real(
|
||||||
language = part
|
language = part
|
||||||
break
|
break
|
||||||
|
|
||||||
# Calculate the number of API calls from the chat hashes
|
|
||||||
num_api_calls = len(coder.chat_completion_call_hashes)
|
|
||||||
|
|
||||||
results = dict(
|
results = dict(
|
||||||
testdir=str(testdir),
|
testdir=str(testdir),
|
||||||
testcase=testdir.name,
|
testcase=testdir.name,
|
||||||
|
@ -1120,7 +1106,6 @@ def run_test_real(
|
||||||
lazy_comments=lazy_comments, # Add the count of pattern matches to the results
|
lazy_comments=lazy_comments, # Add the count of pattern matches to the results
|
||||||
reasoning_effort=reasoning_effort,
|
reasoning_effort=reasoning_effort,
|
||||||
thinking_tokens=thinking_tokens,
|
thinking_tokens=thinking_tokens,
|
||||||
num_api_calls=num_api_calls, # Add the number of API calls
|
|
||||||
chat_hashes=list(
|
chat_hashes=list(
|
||||||
zip(
|
zip(
|
||||||
coder.chat_completion_call_hashes,
|
coder.chat_completion_call_hashes,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue