aider/local_analytics/local_analytics_collector.py
flanker e8bee42d76 feat: Local Analytics Dashboard for Aider
feat: Initialize LocalAnalyticsCollector in main.py

feat: Display session data in local analytics dashboard

fix: Use cumulative data from last interaction for dashboard stats

fix: Extract initial query from diffs in local analytics collector.
2025-05-14 20:39:13 +05:30

351 lines
15 KiB
Python

# aider/local_analytics_collector.py
import atexit
import datetime
import logging
import os
import platform
import shelve
import sys
import time
import uuid
import json # Import json module
import re # Import re module
import litellm
# Import from the local_analytics package (assuming project_root/local_analytics/dashboard_generator.py)
from local_analytics.dashboard_generator import main
try:
from aider import __version__ as aider_version_val
except ImportError:
aider_version_val = "unknown"
# Path constants relative to the project root where Aider is run
DATA_SHELVE_FILE = "local_analytics/aider_analytics_data.shelve"
# Constant for the dashboard HTML file
# REMOVED: DASHBOARD_HTML_FILE = "local_analytics/dashboard.html"
LOG_FILE = "local_analytics/local_analytics_collector.logs"
SESSION_JSONL_FILE = "local_analytics/session.jsonl" # Define the new JSONL file path
class LocalAnalyticsCollector:
"""
Collects local analytics data for Aider sessions and interactions.
This class tracks various metrics related to LLM calls, token usage,
code modifications, and session timings. Data is stored locally using
the `shelve` module.
"""
def __init__(self, io, git_root=None, enabled=True):
"""
Initializes the LocalAnalyticsCollector.
Args:
io: An InputOutput object for user interaction (currently unused beyond holding a reference).
git_root (str, optional): The root directory of the git project.
Defaults to None, in which case the current working directory is used.
enabled (bool, optional): Whether analytics collection is enabled. Defaults to True.
"""
self.io = io # Retain for the final user-facing message
self.enabled = enabled
if not self.enabled:
return
if git_root:
self.project_name = os.path.basename(os.path.abspath(git_root))
base_path = git_root
else:
self.project_name = os.path.basename(os.getcwd())
base_path = os.getcwd()
self.data_file = os.path.join(base_path, DATA_SHELVE_FILE)
self.log_file = os.path.join(base_path, LOG_FILE)
# Store the dashboard output file path
# REMOVED: self.dashboard_output_file = os.path.join(base_path, DASHBOARD_HTML_FILE)
# Store the session JSONL file path
self.session_jsonl_file = os.path.join(base_path, SESSION_JSONL_FILE)
self.session_id = str(uuid.uuid4())
self.aider_version = aider_version_val
self.platform_info = platform.platform()
self.python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
self._current_interaction_data = None
self._interaction_start_time_monotonic = None
# <<< START LOGGER SETUP
log_dir = os.path.dirname(self.log_file)
if log_dir and not os.path.exists(log_dir):
os.makedirs(log_dir, exist_ok=True)
self.logger = logging.getLogger(__name__ + ".LocalAnalyticsCollector") # Or just __name__
self.logger.setLevel(logging.DEBUG)
self.logger.propagate = False # Prevent logs from reaching root logger / console
# Remove existing handlers to prevent duplication if __init__ is called multiple times
for handler in self.logger.handlers[:]:
self.logger.removeHandler(handler)
handler.close()
fh = logging.FileHandler(self.log_file, encoding='utf-8')
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(session_id)s - %(message)s')
fh.setFormatter(formatter)
self.logger.addHandler(fh)
# Make session_id available to logger formatter
self._log_adapter = logging.LoggerAdapter(self.logger, {'session_id': self.session_id})
self._log_adapter.debug(f"--- LocalAnalyticsCollector Initialized ---")
self._log_adapter.debug(f"Project: {self.project_name}")
self._log_adapter.debug(f"Data file: {self.data_file}")
self._log_adapter.debug(f"Log file: {self.log_file}")
self._log_adapter.debug(f"Session JSONL file: {self.session_jsonl_file}")
# <<< END LOGGER SETUP
data_dir = os.path.dirname(self.data_file)
if data_dir and not os.path.exists(data_dir):
os.makedirs(data_dir, exist_ok=True)
# Ensure directory for dashboard.html and session.jsonl also exists
# REMOVED: output_dir = os.path.dirname(self.dashboard_output_file) # Assuming dashboard and jsonl are in the same dir
output_dir = os.path.dirname(self.session_jsonl_file) # Use session_jsonl_file path
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir, exist_ok=True)
atexit.register(self.end_session)
self._original_success_callbacks = litellm.success_callback[:]
self._original_failure_callbacks = litellm.failure_callback[:]
if self._litellm_success_callback not in litellm.success_callback:
litellm.success_callback.append(self._litellm_success_callback)
def start_interaction(self, query, modified_files_in_chat=None):
"""
Starts tracking a new interaction.
If a previous interaction was in progress, it will be ended first.
Args:
query (str): The user's query for this interaction.
modified_files_in_chat (list, optional): A list of files modified in the chat context.
Defaults to None.
"""
if not self.enabled:
return
if self._current_interaction_data:
self.end_interaction() # End previous interaction if any
self._interaction_start_time_monotonic = time.monotonic()
self._current_interaction_data = {
"session_id": self.session_id,
"project_name": self.project_name,
"interaction_timestamp": datetime.datetime.now().isoformat(),
"interaction_duration_seconds": 0,
"query": re.split(r"```diff", query, 1)[0].strip(),
"aider_version": self.aider_version,
"platform_info": self.platform_info,
"python_version": self.python_version,
"token_summary": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0, "estimated_cost": 0.0},
"models_used_summary": [],
"llm_calls_details": [],
"modified_files_in_chat": modified_files_in_chat or [],
"commits_made_this_interaction": []
}
def end_interaction(self):
"""
Ends the current interaction and saves its data.
Calculates interaction duration, summarizes model usage, and persists
the interaction data to the shelve database.
"""
if not self.enabled or not self._current_interaction_data:
return
if self._interaction_start_time_monotonic:
duration = time.monotonic() - self._interaction_start_time_monotonic
self._current_interaction_data["interaction_duration_seconds"] = duration
# Summarize model usage from detailed calls
model_summary_map = {}
for call in self._current_interaction_data.get("llm_calls_details", []):
model_name = call.get("model", "unknown_model")
entry = model_summary_map.setdefault(
model_name,
{
"name": model_name,
"calls": 0,
"cost": 0.0,
"prompt_tokens": 0,
"completion_tokens": 0,
},
)
entry["calls"] += 1
entry["cost"] += call.get("cost", 0.0)
entry["prompt_tokens"] += call.get("prompt_tokens", 0)
entry["completion_tokens"] += call.get("completion_tokens", 0)
self._current_interaction_data["models_used_summary"] = list(model_summary_map.values())
try:
with shelve.open(self.data_file) as db:
interactions = db.get("interactions", [])
interactions.append(self._current_interaction_data)
db["interactions"] = interactions
except Exception as e:
self._log_adapter.error(f"Error saving interaction to shelve: {e}")
self._current_interaction_data = None
self._interaction_start_time_monotonic = None
def _litellm_success_callback(self, kwargs, completion_response, start_time, end_time):
"""
Callback for successful LiteLLM calls.
This method is registered with LiteLLM to capture details of each
successful LLM API call, including token usage and cost.
Args:
kwargs: Keyword arguments passed to the LiteLLM completion call.
completion_response: The response object from LiteLLM.
start_time: Timestamp when the LLM call started.
end_time: Timestamp when the LLM call ended.
"""
if not self.enabled or not self._current_interaction_data:
return
model_name = kwargs.get("model", "unknown_model")
usage = getattr(completion_response, "usage", None)
prompt_tokens = getattr(usage, 'prompt_tokens', 0) if usage else 0
completion_tokens = getattr(usage, 'completion_tokens', 0) if usage else 0
cost = 0.0
try:
# Ensure cost is float, handle potential errors from litellm.completion_cost
calculated_cost = litellm.completion_cost(completion_response=completion_response)
cost = float(calculated_cost) if calculated_cost is not None else 0.0
except Exception as e: # Broad exception catch if litellm.completion_cost fails
self._log_adapter.warning(
f"Analytics: Could not calculate cost for LLM call. Error: {e}"
)
cost = 0.0 # Ensure cost is always a float, defaulting to 0.0 on error
call_detail = {
"model": model_name,
"id": getattr(completion_response, "id", None),
"finish_reason": (
getattr(completion_response.choices[0], "finish_reason", None)
if hasattr(completion_response, "choices") and completion_response.choices
else None
),
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"cost": cost,
"timestamp": start_time.isoformat(),
}
self._current_interaction_data["llm_calls_details"].append(call_detail)
ts = self._current_interaction_data["token_summary"]
ts["prompt_tokens"] += prompt_tokens
ts["completion_tokens"] += completion_tokens
ts["total_tokens"] += prompt_tokens + completion_tokens
ts["estimated_cost"] += cost
def log_commit(self, commit_hash, commit_message):
"""
Logs a git commit made during the current interaction.
Args:
commit_hash (str): The hash of the commit.
commit_message (str): The commit message.
"""
if not self.enabled or not self._current_interaction_data:
return
commit_info = {"hash": commit_hash, "message": commit_message}
self._current_interaction_data["commits_made_this_interaction"].append(commit_info)
def end_session(self):
"""
Ends the analytics collection session.
Ensures any ongoing interaction is ended, generates the HTML dashboard,
unregisters the atexit handler, and restores original LiteLLM callbacks.
"""
if not self.enabled: # If analytics was never enabled or session already ended.
# Unregister atexit handler early if it was somehow registered without enabling
# This path should ideally not be hit if __init__ logic is correct.
try:
atexit.unregister(self.end_session)
except TypeError: # pragma: no cover
pass # Handler was not registered or other issue
return
# End any ongoing interaction first
if self._current_interaction_data:
self.end_interaction()
# Write all the `shelve` data to session.jsonl
if hasattr(self, 'data_file') and hasattr(self, 'session_jsonl_file'):
try:
with shelve.open(self.data_file, 'r') as db:
interactions = db.get("interactions", [])
with open(self.session_jsonl_file, 'w', encoding='utf-8') as f:
for interaction in interactions:
# Ensure data is JSON serializable (e.g., handle datetime objects if any slipped through)
# Although datetime is converted to isoformat already, this is a good practice.
# Simple approach: convert to string if not serializable, or use a custom encoder.
# For now, assuming isoformat is sufficient based on start_interaction.
json_line = json.dumps(interaction)
f.write(json_line + '\n')
# generate dashboard
main()
if hasattr(self, '_log_adapter'):
self._log_adapter.info(f"Shelve data written to {self.session_jsonl_file}")
except Exception as e:
if hasattr(self, '_log_adapter'):
self._log_adapter.error(f"Error writing shelve data to JSONL: {e}")
else: # pragma: no cover
print(f"Error writing shelve data to JSONL: {e}") # Fallback if logger not set
# Cleanup atexit handler
try:
atexit.unregister(self.end_session)
except TypeError: # pragma: no cover
pass # Handler was not registered or other issue
# Restore LiteLLM callbacks
# Check if _original_success_callbacks exists before assigning
if hasattr(self, '_original_success_callbacks'):
litellm.success_callback = self._original_success_callbacks
# if hasattr(self, '_original_failure_callbacks'): # If failure callbacks were also stored
# litellm.failure_callback = self._original_failure_callbacks
if hasattr(self, '_log_adapter'):
self._log_adapter.info("LocalAnalyticsCollector session ended.")
# Ensure logger handlers are closed to release file locks, especially on Windows
if hasattr(self, 'logger'): # Check if logger was initialized
for handler in self.logger.handlers[:]:
handler.close()
self.logger.removeHandler(handler)
# Set self.enabled to False after cleanup to prevent re-entry or further use
self.enabled = False