aider/tests/local_analytics/test_local_analytics_collector_standalone.py

import os
import shelve
import json
import tempfile
import shutil
import unittest
from unittest.mock import patch, MagicMock
import datetime
import time
import logging # Import logging for logger checks

# Assuming the script is run from the project root or PYTHONPATH is set
# This import path assumes the test script is in tests/local_analytics/
try:
    from local_analytics.local_analytics_collector import LocalAnalyticsCollector
except ImportError:
    # Fallback import path if the script is run from a different location
    # This might require adjusting PYTHONPATH or running from the project root
    print("Could not import LocalAnalyticsCollector directly. Ensure PYTHONPATH is set or run from project root.")
    print("Attempting import assuming script is in tests/local_analytics/")
    try:
        # Adjust path for potential different execution contexts
        import sys
        sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
        from local_analytics.local_analytics_collector import LocalAnalyticsCollector
        sys.path.pop(0) # Clean up sys.path
    except ImportError as e:
        print(f"Failed to import LocalAnalyticsCollector even with path adjustment: {e}")
        # Exit or raise error if import fails
        raise


# Dummy IO class to satisfy the collector's __init__
class DummyIO:
    def tool_output(self, *args, **kwargs):
        pass
    def tool_warning(self, *args, **kwargs):
        pass
    def tool_error(self, *args, **kwargs):
        pass
    def confirm_ask(self, *args, **kwargs):
        return 'y' # Default to yes for confirmations
    def print(self, *args, **kwargs):
        pass
    def append_chat_history(self, *args, **kwargs):
        pass


class TestLocalAnalyticsCollectorStandalone(unittest.TestCase):

    def setUp(self):
        # Create a temporary directory for test files
        self.temp_dir = tempfile.mkdtemp()
        self.project_name = os.path.basename(self.temp_dir)
        # Define file paths relative to the temporary project root
        self.analytics_dir = os.path.join(self.temp_dir, "local_analytics")
        self.data_file = os.path.join(self.analytics_dir, "aider_analytics_data.shelve")
        self.session_jsonl_file = os.path.join(self.analytics_dir, "session.jsonl")
        self.dashboard_output_file = os.path.join(self.analytics_dir, "dashboard.html")
        self.log_file = os.path.join(self.analytics_dir, "local_analytics_collector.logs")

        # Ensure the local_analytics directory exists within the temp dir
        os.makedirs(self.analytics_dir, exist_ok=True)

        # Mock the generate_dashboard function
        # Patch the function where it's *used* in local_analytics_collector.py
        self.patcher_generate_dashboard = patch('local_analytics.local_analytics_collector.generate_dashboard')
        self.mock_generate_dashboard = self.patcher_generate_dashboard.start()

        # Mock litellm.completion_cost as it might be called internally
        self.patcher_litellm_cost = patch('litellm.completion_cost')
        self.mock_litellm_cost = self.patcher_litellm_cost.start()
        self.mock_litellm_cost.return_value = 0.03 # Return a fixed cost for testing

        # Mock litellm.success_callback list to control it during the test
        # The collector appends its callback to this list in __init__
        self.patcher_litellm_success_callback_list = patch('litellm.success_callback', new_callable=list)
        self.mock_litellm_success_callback_list = self.patcher_litellm_success_callback_list.start()

        # Create a dummy IO object
        self.dummy_io = DummyIO()

    def tearDown(self):
        # Stop all patches
        self.patcher_generate_dashboard.stop()
        self.patcher_litellm_cost.stop()
        self.patcher_litellm_success_callback_list.stop()

        # Clean up the temporary directory
        if os.path.exists(self.temp_dir):
            shutil.rmtree(self.temp_dir)

    def test_analytics_collection_and_output(self):
        """
        Tests that analytics data is collected, saved to shelve,
        written to session.jsonl, and the dashboard generator is called
        with the correct shelve file path.
        """
        # 1. Initialize collector
        # Pass the temporary directory as the git_root
        collector = LocalAnalyticsCollector(self.dummy_io, git_root=self.temp_dir, enabled=True)

        # Verify the collector's callback was added to the litellm list
        self.assertIn(collector._litellm_success_callback, self.mock_litellm_success_callback_list)

        # 2. Simulate an interaction
        query = "Test query for analytics collection"
        modified_files = ["test_file1.py", "docs/test_doc.md"]
        collector.start_interaction(query, modified_files_in_chat=modified_files)

        # Simulate an LLM call within the interaction
        mock_completion_response = MagicMock()
        mock_completion_response.usage.prompt_tokens = 100
        mock_completion_response.usage.completion_tokens = 200
        mock_completion_response.id = "chatcmpl-test-id-12345"
        mock_completion_response.choices = [MagicMock()]
        mock_completion_response.choices[0].finish_reason = "stop"

        llm_call_kwargs = {"model": "gpt-4o", "messages": [{"role": "user", "content": "..."}]}
        start_time = datetime.datetime.now()
        # Simulate some duration
        time.sleep(0.01)
        end_time = datetime.datetime.now()

        # Manually call the internal success callback to simulate a completed LLM call
        collector._litellm_success_callback(llm_call_kwargs, mock_completion_response, start_time, end_time)

        # Simulate a commit
        commit_hash = "abcdef1234567890"
        commit_message = "feat: added test analytics data"
        collector.log_commit(commit_hash, commit_message)

        # End the interaction
        collector.end_interaction()

        # 3. End the session (triggers saving to shelve and writing to jsonl)
        collector.end_session()

        # 4. Assertions

        # Check if shelve file exists and contains data
        # Shelve creates multiple files, check for the base name
        self.assertTrue(any(f.startswith(os.path.basename(self.data_file)) for f in os.listdir(self.analytics_dir)),
                        "Shelve data files should exist")
        try:
            # Use the base path for shelve.open
            with shelve.open(self.data_file, 'r') as db:
                self.assertIn("interactions", db, "Shelve should contain 'interactions' key")
                interactions = db["interactions"]
                self.assertIsInstance(interactions, list, "'interactions' in shelve should be a list")
                self.assertEqual(len(interactions), 1, "Shelve should contain exactly one interaction")

                interaction_data = interactions[0]
                self.assertEqual(interaction_data.get("query"), query)
                self.assertEqual(interaction_data.get("modified_files_in_chat"), modified_files)
                self.assertGreater(interaction_data.get("interaction_duration_seconds", 0), 0)

                self.assertIn("llm_calls_details", interaction_data)
                self.assertEqual(len(interaction_data["llm_calls_details"]), 1)
                llm_call_detail = interaction_data["llm_calls_details"][0]
                self.assertEqual(llm_call_detail.get("model"), "gpt-4o")
                self.assertEqual(llm_call_detail.get("prompt_tokens"), 100)
                self.assertEqual(llm_call_detail.get("completion_tokens"), 200)
                self.assertEqual(llm_call_detail.get("cost"), 0.03)
                # Check timestamp format (isoformat)
                self.assertIsInstance(llm_call_detail.get("timestamp"), str)
                try:
                    datetime.datetime.fromisoformat(llm_call_detail["timestamp"])
                except ValueError:
                    self.fail("LLM call timestamp is not in ISO format")


                self.assertIn("commits_made_this_interaction", interaction_data)
                self.assertEqual(len(interaction_data["commits_made_this_interaction"]), 1)
                self.assertEqual(interaction_data["commits_made_this_interaction"][0].get("hash"), commit_hash)
                self.assertEqual(interaction_data["commits_made_this_interaction"][0].get("message"), commit_message)

                # Check token summary
                token_summary = interaction_data.get("token_summary", {})
                self.assertEqual(token_summary.get("prompt_tokens"), 100)
                self.assertEqual(token_summary.get("completion_tokens"), 200)
                self.assertEqual(token_summary.get("total_tokens"), 300)
                self.assertEqual(token_summary.get("estimated_cost"), 0.03)


        except Exception as e:
            self.fail(f"Error reading shelve file: {e}")

        # Check if session.jsonl file exists and contains data
        self.assertTrue(os.path.exists(self.session_jsonl_file), "session.jsonl file should exist")
        try:
            with open(self.session_jsonl_file, 'r', encoding='utf-8') as f:
                lines = f.readlines()
                self.assertEqual(len(lines), 1, "session.jsonl should contain exactly one line")
                json_data = json.loads(lines[0])

                # Verify content matches the interaction data saved in shelve
                # Note: JSON serialization/deserialization might change types slightly (e.g., datetime becomes string)
                # We already verified the shelve data structure above, just check some key values
                self.assertIsInstance(json_data, dict)
                self.assertEqual(json_data.get("query"), query)
                self.assertEqual(json_data.get("modified_files_in_chat"), modified_files)
                self.assertIn("llm_calls_details", json_data)
                self.assertEqual(len(json_data["llm_calls_details"]), 1)
                self.assertIn("commits_made_this_interaction", json_data)
                self.assertEqual(len(json_data["commits_made_this_interaction"]), 1)
                self.assertEqual(json_data.get("token_summary", {}).get("total_tokens"), 300)


        except Exception as e:
            self.fail(f"Error reading or parsing session.jsonl: {e}")


        # Check if generate_dashboard was called with correct arguments
        self.mock_generate_dashboard.assert_called_once()
        # Check arguments: project_name, shelve_file_path, dashboard_output_path, logger
        called_args, called_kwargs = self.mock_generate_dashboard.call_args
        self.assertEqual(called_args[0], self.project_name)
        self.assertEqual(called_args[1], self.data_file) # Verify shelve file path is passed
        self.assertEqual(called_args[2], self.dashboard_output_file)
        # Optionally check the logger argument type
        self.assertIsInstance(called_args[3], logging.LoggerAdapter)


# This allows running the test directly from the command line
if __name__ == '__main__':
    # Add a basic handler for unittest output if run directly
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    unittest.main(argv=['first-arg-is-ignored'], exit=False)