mirror of
https://github.com/Aider-AI/aider.git
synced 2025-06-01 10:14:59 +00:00
feat: Local Analytics Dashboard for Aider
feat: Initialize LocalAnalyticsCollector in main.py feat: Display session data in local analytics dashboard fix: Use cumulative data from last interaction for dashboard stats fix: Extract initial query from diffs in local analytics collector.
This commit is contained in:
parent
4e0964046a
commit
e8bee42d76
12 changed files with 1732 additions and 12 deletions
97
local_analytics/clean_query_field.py
Normal file
97
local_analytics/clean_query_field.py
Normal file
|
@ -0,0 +1,97 @@
|
|||
import json
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
# Define file paths (assuming they are in the same directory as the script)
|
||||
BASE_DIR = Path(__file__).resolve().parent
|
||||
SESSION_DATA_FILE = BASE_DIR / "session.jsonl"
|
||||
|
||||
# Regex to identify common code/diff starting lines.
|
||||
# This regex checks if the *first line* of a query starts with one of these patterns.
|
||||
CODE_DIFF_MARKERS_REGEX = re.compile(
|
||||
r"^(```|diff --git|--- |\+\+\+ |@@ )"
|
||||
)
|
||||
|
||||
def clean_query(query_text):
|
||||
"""
|
||||
Cleans the query text.
|
||||
The cleaned query should be only the first line of the original query,
|
||||
and should not be a code/diff line itself.
|
||||
"""
|
||||
if not isinstance(query_text, str) or not query_text.strip():
|
||||
# Return as is if not a string, or if it's an empty/whitespace-only string
|
||||
return query_text
|
||||
|
||||
# First, get the part of the query before any "```diff" block
|
||||
query_before_diff = re.split(r"```diff", query_text, 1)[0]
|
||||
|
||||
# If the part before "```diff" is empty or just whitespace, return empty string
|
||||
if not query_before_diff.strip():
|
||||
return ""
|
||||
|
||||
# Now, take the first line of this potentially multi-line pre-diff query
|
||||
lines_before_diff = query_before_diff.splitlines()
|
||||
if not lines_before_diff: # Should be caught by query_before_diff.strip() check, but for safety
|
||||
return ""
|
||||
|
||||
first_line = lines_before_diff[0]
|
||||
|
||||
# Check if this first line itself is a code/diff marker
|
||||
if CODE_DIFF_MARKERS_REGEX.match(first_line):
|
||||
# If the first line itself is identified as a code/diff marker,
|
||||
# this implies the query might predominantly be code or a diff.
|
||||
# In this case, we set the query to an empty string.
|
||||
return ""
|
||||
else:
|
||||
# Otherwise, the first line is considered the cleaned query.
|
||||
return first_line
|
||||
|
||||
def main():
|
||||
"""Main function to clean the query field in session.jsonl."""
|
||||
if not SESSION_DATA_FILE.exists():
|
||||
print(f"Error: Session data file not found at {SESSION_DATA_FILE}")
|
||||
return
|
||||
|
||||
updated_lines = []
|
||||
modified_count = 0
|
||||
processed_lines = 0
|
||||
|
||||
print(f"Starting cleaning process for {SESSION_DATA_FILE}...")
|
||||
|
||||
with open(SESSION_DATA_FILE, "r", encoding="utf-8") as f:
|
||||
for line_num, line_content in enumerate(f, 1):
|
||||
processed_lines += 1
|
||||
try:
|
||||
data = json.loads(line_content)
|
||||
original_query = data.get("query") # Use .get() for safety
|
||||
|
||||
if "query" in data and isinstance(original_query, str):
|
||||
cleaned_query = clean_query(original_query)
|
||||
if cleaned_query != original_query:
|
||||
data["query"] = cleaned_query
|
||||
modified_count += 1
|
||||
|
||||
updated_lines.append(json.dumps(data) + "\n")
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Warning: Error decoding JSON from line {line_num}: {e}. Keeping original line.")
|
||||
updated_lines.append(line_content) # Keep original line if JSON error
|
||||
except Exception as e:
|
||||
print(f"Warning: Error processing line {line_num}: {e}. Keeping original line.")
|
||||
updated_lines.append(line_content) # Keep original line if other error
|
||||
|
||||
# Write back to the original file
|
||||
try:
|
||||
with open(SESSION_DATA_FILE, "w", encoding="utf-8") as f:
|
||||
for updated_line in updated_lines:
|
||||
f.write(updated_line)
|
||||
print(f"\nProcessing complete.")
|
||||
print(f"Processed {processed_lines} lines.")
|
||||
print(f"{modified_count} queries were cleaned.")
|
||||
print(f"Cleaned data saved to {SESSION_DATA_FILE.resolve()}")
|
||||
except IOError as e:
|
||||
print(f"Error writing cleaned data to {SESSION_DATA_FILE}: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue