mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-31 01:35:00 +00:00
Merge branch 'main' into qwen3
This commit is contained in:
commit
371c82e5bb
11 changed files with 168 additions and 75 deletions
|
@ -1,17 +1,18 @@
|
|||
# Release history
|
||||
|
||||
### main branch
|
||||
|
||||
- Automatically fetch model parameters (context window, pricing) for OpenRouter models directly from their website, by Stefan Hladnik.
|
||||
- Added support for `gemini-2.5-pro-preview-05-06` models.
|
||||
- Enabled support for `thinking_tokens` and `reasoning_effort` parameters for OpenRouter models.
|
||||
- Added model settings for `openrouter/google/gemini-2.5-pro-preview-03-25`.
|
||||
Introduced `--attribute-co-authored-by` option to add co-author trailer to commit messages, by Andrew Grigorev.
|
||||
- Added `--disable-playwright` flag to prevent Playwright installation prompts and usage, by Andrew Grigorev.
|
||||
- The `aider scrape` command-line tool will now use Playwright for web scraping if it is available, by Jon Keys.
|
||||
- Added repomap support for OCaml and OCaml interface files, by Andrey Popp.
|
||||
- Improved cost calculation using `litellm.completion_cost` where available.
|
||||
- Fixed linter command execution on Windows by adopting `oslex` for argument quoting, by Titusz Pan.
|
||||
- Improved cross-platform display of shell commands by using `oslex` for robust argument quoting, by Titusz Pan.
|
||||
Aider wrote 44% of the code in this release.
|
||||
- Aider wrote 45% of the code in this release.
|
||||
|
||||
### Aider v0.82.3
|
||||
|
||||
|
|
|
@ -108,8 +108,6 @@ class Coder:
|
|||
partial_response_content = ""
|
||||
commit_before_message = []
|
||||
message_cost = 0.0
|
||||
message_tokens_sent = 0
|
||||
message_tokens_received = 0
|
||||
add_cache_headers = False
|
||||
cache_warming_thread = None
|
||||
num_cache_warming_pings = 0
|
||||
|
@ -175,6 +173,8 @@ class Coder:
|
|||
commands=from_coder.commands.clone(),
|
||||
total_cost=from_coder.total_cost,
|
||||
ignore_mentions=from_coder.ignore_mentions,
|
||||
total_tokens_sent=from_coder.total_tokens_sent,
|
||||
total_tokens_received=from_coder.total_tokens_received,
|
||||
file_watcher=from_coder.file_watcher,
|
||||
)
|
||||
use_kwargs.update(update) # override to complete the switch
|
||||
|
@ -327,6 +327,8 @@ class Coder:
|
|||
chat_language=None,
|
||||
detect_urls=True,
|
||||
ignore_mentions=None,
|
||||
total_tokens_sent=0,
|
||||
total_tokens_received=0,
|
||||
file_watcher=None,
|
||||
auto_copy_context=False,
|
||||
auto_accept_architect=True,
|
||||
|
@ -373,6 +375,10 @@ class Coder:
|
|||
self.need_commit_before_edits = set()
|
||||
|
||||
self.total_cost = total_cost
|
||||
self.total_tokens_sent = total_tokens_sent
|
||||
self.total_tokens_received = total_tokens_received
|
||||
self.message_tokens_sent = 0
|
||||
self.message_tokens_received = 0
|
||||
|
||||
self.verbose = verbose
|
||||
self.abs_fnames = set()
|
||||
|
@ -1989,7 +1995,7 @@ class Coder:
|
|||
try:
|
||||
# Try and use litellm's built in cost calculator. Seems to work for non-streaming only?
|
||||
cost = litellm.completion_cost(completion_response=completion)
|
||||
except ValueError:
|
||||
except Exception:
|
||||
cost = 0
|
||||
|
||||
if not cost:
|
||||
|
@ -2057,6 +2063,9 @@ class Coder:
|
|||
if not self.usage_report:
|
||||
return
|
||||
|
||||
self.total_tokens_sent += self.message_tokens_sent
|
||||
self.total_tokens_received += self.message_tokens_received
|
||||
|
||||
self.io.tool_output(self.usage_report)
|
||||
|
||||
prompt_tokens = self.message_tokens_sent
|
||||
|
|
|
@ -91,8 +91,8 @@ MODEL_ALIASES = {
|
|||
"flash": "gemini/gemini-2.5-flash-preview-04-17",
|
||||
"quasar": "openrouter/openrouter/quasar-alpha",
|
||||
"r1": "deepseek/deepseek-reasoner",
|
||||
"gemini-2.5-pro": "gemini/gemini-2.5-pro-exp-03-25",
|
||||
"gemini": "gemini/gemini-2.5-pro-preview-03-25",
|
||||
"gemini-2.5-pro": "gemini/gemini-2.5-pro-preview-05-06",
|
||||
"gemini": "gemini/gemini-2.5-pro-preview-05-06",
|
||||
"gemini-exp": "gemini/gemini-2.5-pro-exp-03-25",
|
||||
"grok3": "xai/grok-3-beta",
|
||||
"optimus": "openrouter/openrouter/optimus-alpha",
|
||||
|
@ -231,9 +231,58 @@ class ModelInfoManager:
|
|||
if litellm_info:
|
||||
return litellm_info
|
||||
|
||||
if not cached_info and model.startswith("openrouter/"):
|
||||
openrouter_info = self.fetch_openrouter_model_info(model)
|
||||
if openrouter_info:
|
||||
return openrouter_info
|
||||
|
||||
return cached_info
|
||||
|
||||
|
||||
def fetch_openrouter_model_info(self, model):
|
||||
"""
|
||||
Fetch model info by scraping the openrouter model page.
|
||||
Expected URL: https://openrouter.ai/<model_route>
|
||||
Example: openrouter/qwen/qwen-2.5-72b-instruct:free
|
||||
Returns a dict with keys: max_tokens, max_input_tokens, max_output_tokens, input_cost_per_token, output_cost_per_token.
|
||||
"""
|
||||
url_part = model[len("openrouter/"):]
|
||||
url = "https://openrouter.ai/" + url_part
|
||||
try:
|
||||
import requests
|
||||
response = requests.get(url, timeout=5, verify=self.verify_ssl)
|
||||
if response.status_code != 200:
|
||||
return {}
|
||||
html = response.text
|
||||
import re
|
||||
if re.search(rf'The model\s*.*{re.escape(url_part)}.* is not available', html, re.IGNORECASE):
|
||||
print(f"\033[91mError: Model '{url_part}' is not available\033[0m")
|
||||
return {}
|
||||
text = re.sub(r'<[^>]+>', ' ', html)
|
||||
context_match = re.search(r"([\d,]+)\s*context", text)
|
||||
if context_match:
|
||||
context_str = context_match.group(1).replace(",", "")
|
||||
context_size = int(context_str)
|
||||
else:
|
||||
context_size = None
|
||||
input_cost_match = re.search(r"\$\s*([\d.]+)\s*/M input tokens", text, re.IGNORECASE)
|
||||
output_cost_match = re.search(r"\$\s*([\d.]+)\s*/M output tokens", text, re.IGNORECASE)
|
||||
input_cost = float(input_cost_match.group(1)) / 1000000 if input_cost_match else None
|
||||
output_cost = float(output_cost_match.group(1)) / 1000000 if output_cost_match else None
|
||||
if context_size is None or input_cost is None or output_cost is None:
|
||||
return {}
|
||||
params = {
|
||||
"max_input_tokens": context_size,
|
||||
"max_tokens": context_size,
|
||||
"max_output_tokens": context_size,
|
||||
"input_cost_per_token": input_cost,
|
||||
"output_cost_per_token": output_cost,
|
||||
}
|
||||
return params
|
||||
except Exception as e:
|
||||
print("Error fetching openrouter info:", str(e))
|
||||
return {}
|
||||
|
||||
model_info_manager = ModelInfoManager()
|
||||
|
||||
|
||||
|
|
|
@ -1375,17 +1375,17 @@
|
|||
- name: gemini/gemini-2.5-flash-preview-04-17
|
||||
edit_format: diff
|
||||
use_repo_map: true
|
||||
accepts_settings: ["thinking_tokens"]
|
||||
accepts_settings: ["reasoning_effort", "thinking_tokens"]
|
||||
|
||||
- name: gemini-2.5-flash-preview-04-17
|
||||
edit_format: diff
|
||||
use_repo_map: true
|
||||
accepts_settings: ["thinking_tokens"]
|
||||
accepts_settings: ["reasoning_effort", "thinking_tokens"]
|
||||
|
||||
- name: vertex_ai-language-models/gemini-2.5-flash-preview-04-17
|
||||
edit_format: diff
|
||||
use_repo_map: true
|
||||
accepts_settings: ["thinking_tokens"]
|
||||
accepts_settings: ["reasoning_effort", "thinking_tokens"]
|
||||
|
||||
- name: openrouter/google/gemini-2.5-pro-preview-03-25
|
||||
overeager: true
|
||||
|
@ -1397,7 +1397,7 @@
|
|||
overeager: true
|
||||
edit_format: diff-fenced
|
||||
use_repo_map: true
|
||||
weak_model_name: gemini/gemini-2.0-flash
|
||||
weak_model_name: gemini/gemini-2.5-flash-preview-04-17
|
||||
|
||||
- name: vertex_ai/gemini-2.5-pro-preview-05-06
|
||||
edit_format: diff-fenced
|
||||
|
|
|
@ -25,17 +25,18 @@ cog.out(text)
|
|||
|
||||
|
||||
### main branch
|
||||
|
||||
- Automatically fetch model parameters (context window, pricing) for OpenRouter models directly from their website, by Stefan Hladnik.
|
||||
- Added support for `gemini-2.5-pro-preview-05-06` models.
|
||||
- Enabled support for `thinking_tokens` and `reasoning_effort` parameters for OpenRouter models.
|
||||
- Added model settings for `openrouter/google/gemini-2.5-pro-preview-03-25`.
|
||||
Introduced `--attribute-co-authored-by` option to add co-author trailer to commit messages, by Andrew Grigorev.
|
||||
- Added `--disable-playwright` flag to prevent Playwright installation prompts and usage, by Andrew Grigorev.
|
||||
- The `aider scrape` command-line tool will now use Playwright for web scraping if it is available, by Jon Keys.
|
||||
- Added repomap support for OCaml and OCaml interface files, by Andrey Popp.
|
||||
- Improved cost calculation using `litellm.completion_cost` where available.
|
||||
- Fixed linter command execution on Windows by adopting `oslex` for argument quoting, by Titusz Pan.
|
||||
- Improved cross-platform display of shell commands by using `oslex` for robust argument quoting, by Titusz Pan.
|
||||
Aider wrote 44% of the code in this release.
|
||||
- Aider wrote 45% of the code in this release.
|
||||
|
||||
### Aider v0.82.3
|
||||
|
||||
|
|
|
@ -1225,3 +1225,28 @@
|
|||
seconds_per_case: 50.1
|
||||
total_cost: 1.8451
|
||||
|
||||
- dirname: 2025-05-07-19-32-40--gemini0506-diff-fenced-completion_cost
|
||||
test_cases: 225
|
||||
model: Gemini 2.5 Pro Preview 05-06
|
||||
edit_format: diff-fenced
|
||||
commit_hash: 3b08327-dirty
|
||||
pass_rate_1: 36.4
|
||||
pass_rate_2: 76.9
|
||||
pass_num_1: 82
|
||||
pass_num_2: 173
|
||||
percent_cases_well_formed: 97.3
|
||||
error_outputs: 15
|
||||
num_malformed_responses: 7
|
||||
num_with_malformed_responses: 6
|
||||
user_asks: 105
|
||||
lazy_comments: 0
|
||||
syntax_errors: 0
|
||||
indentation_errors: 0
|
||||
exhausted_context_windows: 0
|
||||
test_timeouts: 2
|
||||
total_tests: 225
|
||||
command: aider --model gemini/gemini-2.5-pro-preview-05-06
|
||||
date: 2025-05-07
|
||||
versions: 0.82.4.dev
|
||||
seconds_per_case: 165.3
|
||||
total_cost: 37.4104
|
|
@ -1,7 +1,7 @@
|
|||
---
|
||||
title: Gemini 2.5 Pro Preview 03-25 benchmark cost
|
||||
excerpt: The $6.32 benchmark cost reported for Gemini 2.5 Pro Preview 03-25 was incorrect.
|
||||
draft: true
|
||||
draft: false
|
||||
nav_exclude: true
|
||||
---
|
||||
{% if page.date %}
|
||||
|
@ -41,7 +41,7 @@ completed the benchmark at a cost of about $37.
|
|||
|
||||
## Investigation detail
|
||||
|
||||
The version of litellm available at that time appears to have been
|
||||
The version of litellm available at that time of the benchmark appears to have been
|
||||
excluding reasoning tokens from the token counts it reported.
|
||||
So even though aider had correct per-token pricing, it did not have the correct token counts
|
||||
used during the benchmark.
|
||||
|
|
|
@ -1,57 +1,3 @@
|
|||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190053}
|
||||
{"event": "message_send", "properties": {"main_model": "gemini/gemini-2.5-pro-exp-03-25", "weak_model": "gemini/gemini-2.0-flash", "editor_model": "gemini/gemini-2.5-pro-exp-03-25", "edit_format": "udiff-simple", "prompt_tokens": 16858, "completion_tokens": 143, "total_tokens": 17001, "cost": 0, "total_cost": 0.0}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190062}
|
||||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190237}
|
||||
{"event": "message_send", "properties": {"main_model": "gemini/gemini-2.5-pro-exp-03-25", "weak_model": "gemini/gemini-2.0-flash", "editor_model": "gemini/gemini-2.5-pro-exp-03-25", "edit_format": "udiff-simple", "prompt_tokens": 17173, "completion_tokens": 312, "total_tokens": 17485, "cost": 0, "total_cost": 0.0}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190243}
|
||||
{"event": "command_exit", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190249}
|
||||
{"event": "exit", "properties": {"reason": "/exit"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190249}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190260}
|
||||
{"event": "repo", "properties": {"num_files": 615}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190262}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190262}
|
||||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190262}
|
||||
{"event": "message_send", "properties": {"main_model": "gemini/gemini-2.5-flash-preview-04-17", "weak_model": "gemini/gemini-2.5-flash-preview-04-17", "editor_model": "gemini/gemini-2.5-flash-preview-04-17", "edit_format": "diff", "prompt_tokens": 10189, "completion_tokens": 116, "total_tokens": 10305, "cost": 0.0015979499999999999, "total_cost": 0.0015979499999999999}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190272}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190955}
|
||||
{"event": "repo", "properties": {"num_files": 615}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190955}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190955}
|
||||
{"event": "cli session", "properties": {"main_model": "gemini/gemini-2.5-pro-exp-03-25", "weak_model": "gemini/gemini-2.0-flash", "editor_model": "gemini/gemini-2.5-pro-exp-03-25", "edit_format": "udiff-simple"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190955}
|
||||
{"event": "command_edit", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745190964}
|
||||
{"event": "command_ask", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191011}
|
||||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191011}
|
||||
{"event": "message_send", "properties": {"main_model": "gemini/gemini-2.5-pro-exp-03-25", "weak_model": "gemini/gemini-2.0-flash", "editor_model": "gemini/gemini-2.5-pro-exp-03-25", "edit_format": "ask", "prompt_tokens": 8442, "completion_tokens": 417, "total_tokens": 8859, "cost": 0, "total_cost": 0.0}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191025}
|
||||
{"event": "command_ask", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191054}
|
||||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191054}
|
||||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191127}
|
||||
{"event": "message_send", "properties": {"main_model": "gemini/gemini-2.5-pro-exp-03-25", "weak_model": "gemini/gemini-2.0-flash", "editor_model": "gemini/gemini-2.5-pro-exp-03-25", "edit_format": "udiff-simple", "prompt_tokens": 14287, "completion_tokens": 1995, "total_tokens": 16282, "cost": 0, "total_cost": 0.0}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191145}
|
||||
{"event": "command_clear", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191182}
|
||||
{"event": "command_ask", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191214}
|
||||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191214}
|
||||
{"event": "command_clear", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191224}
|
||||
{"event": "command_edit", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191228}
|
||||
{"event": "command_ask", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191286}
|
||||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191286}
|
||||
{"event": "message_send", "properties": {"main_model": "gemini/gemini-2.5-pro-exp-03-25", "weak_model": "gemini/gemini-2.0-flash", "editor_model": "gemini/gemini-2.5-pro-exp-03-25", "edit_format": "ask", "prompt_tokens": 8481, "completion_tokens": 1848, "total_tokens": 10329, "cost": 0, "total_cost": 0.0}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191334}
|
||||
{"event": "command_clear", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191533}
|
||||
{"event": "ai-comments execute", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191535}
|
||||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191535}
|
||||
{"event": "message_send", "properties": {"main_model": "gemini/gemini-2.5-pro-exp-03-25", "weak_model": "gemini/gemini-2.0-flash", "editor_model": "gemini/gemini-2.5-pro-exp-03-25", "edit_format": "udiff-simple", "prompt_tokens": 8611, "completion_tokens": 89, "total_tokens": 8700, "cost": 0, "total_cost": 0.0}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745191553}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192191}
|
||||
{"event": "repo", "properties": {"num_files": 615}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192191}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192191}
|
||||
{"event": "exit", "properties": {"reason": "Completed lint/test/commit"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192196}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192301}
|
||||
{"event": "no-repo", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192301}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192301}
|
||||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192301}
|
||||
{"event": "message_send", "properties": {"main_model": "gemini/gemini-2.5-pro-exp-03-25", "weak_model": "gemini/gemini-2.0-flash", "editor_model": "gemini/gemini-2.5-pro-exp-03-25", "edit_format": "udiff-simple", "prompt_tokens": 7873, "completion_tokens": 147, "total_tokens": 8020, "cost": 0, "total_cost": 0.0}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192314}
|
||||
{"event": "exit", "properties": {"reason": "Completed --message"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192314}
|
||||
{"event": "ai-comments file-add", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192315}
|
||||
{"event": "ai-comments file-add", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192318}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192612}
|
||||
{"event": "repo", "properties": {"num_files": 615}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192612}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192612}
|
||||
{"event": "exit", "properties": {"reason": "Completed lint/test/commit"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745192615}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745194554}
|
||||
{"event": "repo", "properties": {"num_files": 615}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745194556}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745194556}
|
||||
{"event": "cli session", "properties": {"main_model": "gemini/gemini-2.5-pro-exp-03-25", "weak_model": "gemini/gemini-2.0-flash", "editor_model": "gemini/gemini-2.5-pro-exp-03-25", "edit_format": "udiff-simple"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745194556}
|
||||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745194568}
|
||||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1745194656}
|
||||
|
@ -998,3 +944,57 @@
|
|||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746664482}
|
||||
{"event": "message_send", "properties": {"main_model": "gemini/gemini-2.5-pro-exp-03-25", "weak_model": "gemini/gemini-2.0-flash", "editor_model": "gemini/gemini-2.5-pro-exp-03-25", "edit_format": "udiff-simple", "prompt_tokens": 19959, "completion_tokens": 176, "total_tokens": 20135, "cost": 0, "total_cost": 0.0}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746664554}
|
||||
{"event": "exit", "properties": {"reason": "Completed --message"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746664554}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666729}
|
||||
{"event": "repo", "properties": {"num_files": 621}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666730}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666730}
|
||||
{"event": "exit", "properties": {"reason": "Completed lint/test/commit"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666733}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666821}
|
||||
{"event": "repo", "properties": {"num_files": 621}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666822}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666822}
|
||||
{"event": "exit", "properties": {"reason": "Completed lint/test/commit"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666825}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666956}
|
||||
{"event": "repo", "properties": {"num_files": 621}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666956}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666956}
|
||||
{"event": "cli session", "properties": {"main_model": "gemini/gemini-2.0-flash", "weak_model": "gemini/gemini-2.0-flash", "editor_model": "gemini/gemini-2.0-flash", "edit_format": "diff"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666956}
|
||||
{"event": "exit", "properties": {"reason": "Completed main CLI coder.run"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666960}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666962}
|
||||
{"event": "repo", "properties": {"num_files": 621}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666962}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666962}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666966}
|
||||
{"event": "model warning", "properties": {"main_model": "gemini/REDACTED", "weak_model": "gemini/REDACTED", "editor_model": "gemini/REDACTED"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666968}
|
||||
{"event": "exit", "properties": {"reason": "Keyboard interrupt during model warnings"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666988}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666997}
|
||||
{"event": "repo", "properties": {"num_files": 621}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666997}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666997}
|
||||
{"event": "cli session", "properties": {"main_model": "gemini/gemini-2.5-flash-preview-04-17", "weak_model": "gemini/gemini-2.5-flash-preview-04-17", "editor_model": "gemini/gemini-2.5-flash-preview-04-17", "edit_format": "diff"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666997}
|
||||
{"event": "command_exit", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666999}
|
||||
{"event": "exit", "properties": {"reason": "/exit"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746666999}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667009}
|
||||
{"event": "model warning", "properties": {"main_model": "openrouter/REDACTED", "weak_model": "openrouter/REDACTED", "editor_model": "openrouter/REDACTED"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667011}
|
||||
{"event": "exit", "properties": {"reason": "Keyboard interrupt during model warnings"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667027}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667033}
|
||||
{"event": "repo", "properties": {"num_files": 621}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667033}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667033}
|
||||
{"event": "exit", "properties": {"reason": "Completed lint/test/commit"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667036}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667201}
|
||||
{"event": "model warning", "properties": {"main_model": "openrouter/REDACTED", "weak_model": "openrouter/REDACTED", "editor_model": "openrouter/REDACTED"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667203}
|
||||
{"event": "repo", "properties": {"num_files": 621}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667207}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667207}
|
||||
{"event": "cli session", "properties": {"main_model": "openrouter/REDACTED", "weak_model": "openrouter/REDACTED", "editor_model": "openrouter/REDACTED", "edit_format": "whole"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667207}
|
||||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667209}
|
||||
{"event": "command_exit", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667229}
|
||||
{"event": "exit", "properties": {"reason": "/exit"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667229}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667436}
|
||||
{"event": "repo", "properties": {"num_files": 621}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667439}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667439}
|
||||
{"event": "cli session", "properties": {"main_model": "openrouter/REDACTED", "weak_model": "openrouter/REDACTED", "editor_model": "openrouter/REDACTED", "edit_format": "whole"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667439}
|
||||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667441}
|
||||
{"event": "message_send", "properties": {"main_model": "openrouter/REDACTED", "weak_model": "openrouter/REDACTED", "editor_model": "openrouter/REDACTED", "edit_format": "whole", "prompt_tokens": 9655, "completion_tokens": 395, "total_tokens": 10050, "cost": 0.00168525, "total_cost": 0.00168525}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667458}
|
||||
{"event": "command_exit", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667467}
|
||||
{"event": "exit", "properties": {"reason": "/exit"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667467}
|
||||
{"event": "launched", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667540}
|
||||
{"event": "no-repo", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667540}
|
||||
{"event": "auto_commits", "properties": {"enabled": true}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667540}
|
||||
{"event": "message_send_starting", "properties": {}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667540}
|
||||
{"event": "message_send", "properties": {"main_model": "gemini/gemini-2.5-pro-exp-03-25", "weak_model": "gemini/gemini-2.0-flash", "editor_model": "gemini/gemini-2.5-pro-exp-03-25", "edit_format": "udiff-simple", "prompt_tokens": 23450, "completion_tokens": 234, "total_tokens": 23684, "cost": 0, "total_cost": 0.0}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667620}
|
||||
{"event": "exit", "properties": {"reason": "Completed --message"}, "user_id": "c42c4e6b-f054-44d7-ae1f-6726cc41da88", "time": 1746667620}
|
||||
|
|
|
@ -694,7 +694,7 @@ cog.out("```\n")
|
|||
|
||||
- name: gemini/gemini-2.5-pro-preview-05-06
|
||||
edit_format: diff-fenced
|
||||
weak_model_name: gemini/gemini-2.0-flash
|
||||
weak_model_name: gemini/gemini-2.5-flash-preview-04-17
|
||||
use_repo_map: true
|
||||
overeager: true
|
||||
|
||||
|
|
|
@ -264,13 +264,12 @@ tr:hover { background-color: #f5f5f5; }
|
|||
</style>
|
||||
<table>
|
||||
<tr><th>Model Name</th><th class='right'>Total Tokens</th><th class='right'>Percent</th></tr>
|
||||
<tr><td>gemini/gemini-2.5-pro-exp-03-25</td><td class='right'>1,332,231</td><td class='right'>67.3%</td></tr>
|
||||
<tr><td>gemini/gemini-2.5-pro-preview-03-25</td><td class='right'>442,019</td><td class='right'>22.3%</td></tr>
|
||||
<tr><td>o3</td><td class='right'>138,842</td><td class='right'>7.0%</td></tr>
|
||||
<tr><td>gemini/gemini-2.5-pro-exp-03-25</td><td class='right'>1,269,239</td><td class='right'>66.2%</td></tr>
|
||||
<tr><td>gemini/gemini-2.5-pro-preview-03-25</td><td class='right'>442,019</td><td class='right'>23.1%</td></tr>
|
||||
<tr><td>o3</td><td class='right'>138,842</td><td class='right'>7.2%</td></tr>
|
||||
<tr><td>openrouter/anthropic/claude-3.7-sonnet</td><td class='right'>41,017</td><td class='right'>2.1%</td></tr>
|
||||
<tr><td>gemini/gemini-2.5-flash-preview-04-17</td><td class='right'>10,305</td><td class='right'>0.5%</td></tr>
|
||||
<tr><td>openrouter/REDACTED</td><td class='right'>15,587</td><td class='right'>0.8%</td></tr>
|
||||
<tr><td>gemini/gemini-2.5-pro-preview-05-06</td><td class='right'>8,576</td><td class='right'>0.4%</td></tr>
|
||||
<tr><td>openrouter/REDACTED</td><td class='right'>5,537</td><td class='right'>0.3%</td></tr>
|
||||
<tr><td>gemini/REDACTED</td><td class='right'>1,989</td><td class='right'>0.1%</td></tr>
|
||||
</table>
|
||||
|
||||
|
|
|
@ -492,6 +492,8 @@ def summarize_results(dirname, stats_languages=None):
|
|||
res.syntax_errors = 0
|
||||
res.indentation_errors = 0
|
||||
res.lazy_comments = 0
|
||||
res.prompt_tokens = 0
|
||||
res.completion_tokens = 0
|
||||
|
||||
res.reasoning_effort = None
|
||||
res.thinking_tokens = None
|
||||
|
@ -523,6 +525,9 @@ def summarize_results(dirname, stats_languages=None):
|
|||
res.syntax_errors += results.get("syntax_errors", 0)
|
||||
res.indentation_errors += results.get("indentation_errors", 0)
|
||||
|
||||
res.prompt_tokens += results.get("prompt_tokens", 0)
|
||||
res.completion_tokens += results.get("completion_tokens", 0)
|
||||
|
||||
res.reasoning_effort = results.get("reasoning_effort")
|
||||
res.thinking_tokens = results.get("thinking_tokens")
|
||||
|
||||
|
@ -590,6 +595,8 @@ def summarize_results(dirname, stats_languages=None):
|
|||
show("syntax_errors")
|
||||
show("indentation_errors")
|
||||
show("exhausted_context_windows")
|
||||
show("prompt_tokens", red=None)
|
||||
show("completion_tokens", red=None)
|
||||
show("test_timeouts")
|
||||
print(f" total_tests: {res.total_tests}")
|
||||
|
||||
|
@ -950,6 +957,8 @@ def run_test_real(
|
|||
indentation_errors=indentation_errors,
|
||||
lazy_comments=lazy_comments, # Add the count of pattern matches to the results
|
||||
reasoning_effort=reasoning_effort,
|
||||
prompt_tokens=coder.total_tokens_sent,
|
||||
completion_tokens=coder.total_tokens_received,
|
||||
thinking_tokens=thinking_tokens,
|
||||
chat_hashes=list(
|
||||
zip(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue