mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-30 01:04:59 +00:00

feat: Initialize LocalAnalyticsCollector in main.py feat: Display session data in local analytics dashboard fix: Use cumulative data from last interaction for dashboard stats fix: Extract initial query from diffs in local analytics collector.
97 lines
3.7 KiB
Python
97 lines
3.7 KiB
Python
import json
|
|
from pathlib import Path
|
|
import re
|
|
|
|
# Define file paths (assuming they are in the same directory as the script)
|
|
BASE_DIR = Path(__file__).resolve().parent
|
|
SESSION_DATA_FILE = BASE_DIR / "session.jsonl"
|
|
|
|
# Regex to identify common code/diff starting lines.
|
|
# This regex checks if the *first line* of a query starts with one of these patterns.
|
|
CODE_DIFF_MARKERS_REGEX = re.compile(
|
|
r"^(```|diff --git|--- |\+\+\+ |@@ )"
|
|
)
|
|
|
|
def clean_query(query_text):
|
|
"""
|
|
Cleans the query text.
|
|
The cleaned query should be only the first line of the original query,
|
|
and should not be a code/diff line itself.
|
|
"""
|
|
if not isinstance(query_text, str) or not query_text.strip():
|
|
# Return as is if not a string, or if it's an empty/whitespace-only string
|
|
return query_text
|
|
|
|
# First, get the part of the query before any "```diff" block
|
|
query_before_diff = re.split(r"```diff", query_text, 1)[0]
|
|
|
|
# If the part before "```diff" is empty or just whitespace, return empty string
|
|
if not query_before_diff.strip():
|
|
return ""
|
|
|
|
# Now, take the first line of this potentially multi-line pre-diff query
|
|
lines_before_diff = query_before_diff.splitlines()
|
|
if not lines_before_diff: # Should be caught by query_before_diff.strip() check, but for safety
|
|
return ""
|
|
|
|
first_line = lines_before_diff[0]
|
|
|
|
# Check if this first line itself is a code/diff marker
|
|
if CODE_DIFF_MARKERS_REGEX.match(first_line):
|
|
# If the first line itself is identified as a code/diff marker,
|
|
# this implies the query might predominantly be code or a diff.
|
|
# In this case, we set the query to an empty string.
|
|
return ""
|
|
else:
|
|
# Otherwise, the first line is considered the cleaned query.
|
|
return first_line
|
|
|
|
def main():
|
|
"""Main function to clean the query field in session.jsonl."""
|
|
if not SESSION_DATA_FILE.exists():
|
|
print(f"Error: Session data file not found at {SESSION_DATA_FILE}")
|
|
return
|
|
|
|
updated_lines = []
|
|
modified_count = 0
|
|
processed_lines = 0
|
|
|
|
print(f"Starting cleaning process for {SESSION_DATA_FILE}...")
|
|
|
|
with open(SESSION_DATA_FILE, "r", encoding="utf-8") as f:
|
|
for line_num, line_content in enumerate(f, 1):
|
|
processed_lines += 1
|
|
try:
|
|
data = json.loads(line_content)
|
|
original_query = data.get("query") # Use .get() for safety
|
|
|
|
if "query" in data and isinstance(original_query, str):
|
|
cleaned_query = clean_query(original_query)
|
|
if cleaned_query != original_query:
|
|
data["query"] = cleaned_query
|
|
modified_count += 1
|
|
|
|
updated_lines.append(json.dumps(data) + "\n")
|
|
|
|
except json.JSONDecodeError as e:
|
|
print(f"Warning: Error decoding JSON from line {line_num}: {e}. Keeping original line.")
|
|
updated_lines.append(line_content) # Keep original line if JSON error
|
|
except Exception as e:
|
|
print(f"Warning: Error processing line {line_num}: {e}. Keeping original line.")
|
|
updated_lines.append(line_content) # Keep original line if other error
|
|
|
|
# Write back to the original file
|
|
try:
|
|
with open(SESSION_DATA_FILE, "w", encoding="utf-8") as f:
|
|
for updated_line in updated_lines:
|
|
f.write(updated_line)
|
|
print(f"\nProcessing complete.")
|
|
print(f"Processed {processed_lines} lines.")
|
|
print(f"{modified_count} queries were cleaned.")
|
|
print(f"Cleaned data saved to {SESSION_DATA_FILE.resolve()}")
|
|
except IOError as e:
|
|
print(f"Error writing cleaned data to {SESSION_DATA_FILE}: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|