mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-29 00:35:00 +00:00
Feat: Add --reasoning-effort switch to benchmark script
This commit is contained in:
parent
50fd544070
commit
8e689d35af
1 changed files with 17 additions and 0 deletions
|
@ -206,6 +206,9 @@ def main(
|
|||
read_model_settings: str = typer.Option(
|
||||
None, "--read-model-settings", help="Load aider model settings from YAML file"
|
||||
),
|
||||
reasoning_effort: Optional[float] = typer.Option(
|
||||
None, "--reasoning-effort", help="Set reasoning effort for models that support it"
|
||||
),
|
||||
exercises_dir: str = typer.Option(
|
||||
EXERCISES_DIR_DEFAULT, "--exercises-dir", help="Directory with exercise files"
|
||||
),
|
||||
|
@ -362,6 +365,7 @@ def main(
|
|||
editor_edit_format,
|
||||
num_ctx,
|
||||
sleep,
|
||||
reasoning_effort,
|
||||
)
|
||||
|
||||
all_results.append(results)
|
||||
|
@ -384,6 +388,7 @@ def main(
|
|||
replay,
|
||||
editor_model,
|
||||
editor_edit_format,
|
||||
reasoning_effort,
|
||||
)
|
||||
all_results = run_test_threaded.gather(tqdm=True)
|
||||
|
||||
|
@ -481,6 +486,7 @@ def summarize_results(dirname, stats_languages=None):
|
|||
res.indentation_errors = 0
|
||||
res.lazy_comments = 0
|
||||
|
||||
res.reasoning_effort = None
|
||||
variants = defaultdict(set)
|
||||
|
||||
for results in all_results:
|
||||
|
@ -509,7 +515,10 @@ def summarize_results(dirname, stats_languages=None):
|
|||
res.syntax_errors += results.get("syntax_errors", 0)
|
||||
res.indentation_errors += results.get("indentation_errors", 0)
|
||||
|
||||
res.reasoning_effort = results.get("reasoning_effort")
|
||||
|
||||
for key in "model edit_format commit_hash editor_model editor_edit_format".split():
|
||||
|
||||
val = results.get(key)
|
||||
if val:
|
||||
variants[key].add(val)
|
||||
|
@ -552,6 +561,9 @@ def summarize_results(dirname, stats_languages=None):
|
|||
setattr(res, key, val)
|
||||
console.print(f" {key}: {val}", style=style)
|
||||
|
||||
if res.reasoning_effort is not None:
|
||||
print(f" reasoning_effort: {res.reasoning_effort}")
|
||||
|
||||
for i in range(tries):
|
||||
print(f" pass_rate_{i + 1}: {percents[i]:.1f}")
|
||||
for i in range(tries):
|
||||
|
@ -663,6 +675,7 @@ def run_test_real(
|
|||
editor_edit_format,
|
||||
num_ctx=None,
|
||||
sleep=0,
|
||||
reasoning_effort=None,
|
||||
read_model_settings=None,
|
||||
):
|
||||
if not os.path.isdir(testdir):
|
||||
|
@ -769,6 +782,9 @@ def run_test_real(
|
|||
editor_edit_format=editor_edit_format,
|
||||
)
|
||||
|
||||
if reasoning_effort is not None:
|
||||
main_model.set_reasoning_effort(reasoning_effort)
|
||||
|
||||
dump(main_model.max_chat_history_tokens)
|
||||
|
||||
if num_ctx:
|
||||
|
@ -919,6 +935,7 @@ def run_test_real(
|
|||
syntax_errors=syntax_errors,
|
||||
indentation_errors=indentation_errors,
|
||||
lazy_comments=lazy_comments, # Add the count of pattern matches to the results
|
||||
reasoning_effort=reasoning_effort,
|
||||
chat_hashes=list(
|
||||
zip(
|
||||
coder.chat_completion_call_hashes,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue