"""
Unit tests for app.py - focusing on Step 4: Summary Generation with Error Statistics
"""

import json
from unittest import mock
from unittest.mock import MagicMock

from app import _extract_error_statistics
from src.utils.error_tracking import ErrorTracker


class TestExtractErrorStatistics:
    """Test the _extract_error_statistics function."""

    def setup_method(self):
        """Set up test fixtures."""
        # Mock the logger to avoid setup complexity
        self.logger_patch = mock.patch("app.logger")
        self.mock_logger = self.logger_patch.start()

    def teardown_method(self):
        """Tear down test fixtures."""
        self.logger_patch.stop()

    def test_extract_from_metrics_complete_stats(self):
        """Test extracting complete error statistics from metrics."""
        results = {"step1": 5, "step2": 10}

        metrics = {
            "duration": 1.23,
            "accuracy": 0.85,
            "failed_llm_calls": 3,
            "error_types": {"ValueError": 2, "RuntimeError": 1},
            "errors": [
                {
                    "timestamp": 1672531200.0,
                    "error_type": "ValueError",
                    "error_message": "Test error",
                    "task_id": "task_1",
                    "retry_attempts": 3,
                    "input_preview": "Test question",
                }
            ],
        }

        error_stats = _extract_error_statistics(results, metrics)

        # Verify all error statistics were extracted
        assert error_stats["failed_llm_calls"] == 3
        assert error_stats["error_types"]["ValueError"] == 2
        assert error_stats["error_types"]["RuntimeError"] == 1
        assert len(error_stats["errors"]) == 1
        assert error_stats["errors"][0]["error_type"] == "ValueError"

        # Verify non-error metrics were not included
        assert "duration" not in error_stats
        assert "accuracy" not in error_stats

    def test_extract_from_metrics_partial_stats(self):
        """Test extracting partial error statistics from metrics."""
        results = {"step1": 5}

        metrics = {
            "duration": 1.23,
            "failed_llm_calls": 2,
            "error_types": {"ValueError": 2},
            # Missing average_retry_attempts and sample_errors
        }

        error_stats = _extract_error_statistics(results, metrics)

        # Should extract what's available
        assert error_stats["failed_llm_calls"] == 2
        assert error_stats["error_types"]["ValueError"] == 2
        # Should not have keys that weren't present
        assert "average_retry_attempts" not in error_stats
        assert "sample_errors" not in error_stats

    def test_extract_from_error_tracker_in_results(self):
        """Test extracting error statistics from ErrorTracker in step results."""
        # Create error tracker with sample errors
        error_tracker = ErrorTracker()
        error_tracker.record_error(
            error=ValueError("Test error 1"),
            input_messages=[{"role": "user", "content": "Test question 1"}],
            model_config={"name": "test-model", "version": "1.0"},
            retry_attempts=2,
            task_id="task_1",
        )
        error_tracker.record_error(
            error=RuntimeError("Test error 2"),
            input_messages=[{"role": "user", "content": "Test question 2"}],
            model_config={"name": "test-model", "version": "1.0"},
            retry_attempts=3,
            task_id="task_2",
        )

        # Mock results with error tracker (QATask.run() style return)
        results = {
            "step1": 5,
            "run_task": (
                MagicMock(),  # Mock DataFrame
                0.85,  # Mock score
                error_tracker,  # Error tracker as third element
            ),
        }

        metrics = {"duration": 1.23, "accuracy": 0.85}

        error_stats = _extract_error_statistics(results, metrics)

        # Verify error statistics were extracted from tracker
        assert error_stats["failed_llm_calls"] == 2
        assert error_stats["error_types"]["ValueError"] == 1
        assert error_stats["error_types"]["RuntimeError"] == 1
        assert len(error_stats["errors"]) == 2

    def test_extract_from_empty_error_tracker(self):
        """Test behavior with empty error tracker."""
        error_tracker = ErrorTracker()  # No errors recorded

        results = {"run_task": (MagicMock(), 0.95, error_tracker)}

        metrics = {"duration": 1.23}

        error_stats = _extract_error_statistics(results, metrics)

        # Should return empty dict for empty error tracker
        assert error_stats == {}

    def test_extract_no_error_information(self):
        """Test behavior when no error information is available."""
        results = {"step1": 5, "step2": 10}
        metrics = {"duration": 1.23, "accuracy": 0.95}

        error_stats = _extract_error_statistics(results, metrics)

        # Should return empty dict when no error information
        assert error_stats == {}

    def test_extract_metrics_priority_over_tracker(self):
        """Test that metrics take priority over error tracker."""
        # Create error tracker
        error_tracker = ErrorTracker()
        error_tracker.record_error(
            error=ValueError("Tracker error"),
            input_messages=[{"role": "user", "content": "Tracker question"}],
            model_config={"name": "tracker-model"},
            retry_attempts=1,
            task_id="tracker_task",
        )

        results = {"run_task": (MagicMock(), 0.85, error_tracker)}

        # Metrics also have error statistics
        metrics = {
            "failed_llm_calls": 5,  # Different from tracker
            "error_types": {"RuntimeError": 5},  # Different from tracker
        }

        error_stats = _extract_error_statistics(results, metrics)

        # Should use metrics, not tracker
        assert error_stats["failed_llm_calls"] == 5
        assert error_stats["error_types"]["RuntimeError"] == 5
        assert "ValueError" not in error_stats["error_types"]

    def test_extract_multiple_steps_with_trackers(self):
        """Test extraction when multiple steps have error trackers."""
        # Create two error trackers
        tracker1 = ErrorTracker()
        tracker1.record_error(
            error=ValueError("Error 1"),
            input_messages=[{"role": "user", "content": "Question 1"}],
            model_config={"name": "model1"},
            retry_attempts=1,
            task_id="task_1",
        )

        tracker2 = ErrorTracker()
        tracker2.record_error(
            error=RuntimeError("Error 2"),
            input_messages=[{"role": "user", "content": "Question 2"}],
            model_config={"name": "model2"},
            retry_attempts=2,
            task_id="task_2",
        )
        tracker2.record_error(
            error=TypeError("Error 3"),
            input_messages=[{"role": "user", "content": "Question 3"}],
            model_config={"name": "model2"},
            retry_attempts=1,
            task_id="task_3",
        )

        results = {
            "step1": (MagicMock(), 0.8, tracker1),
            "step2": (MagicMock(), 0.9, tracker2),
        }

        metrics = {}

        error_stats = _extract_error_statistics(results, metrics)

        # Should extract from the first error tracker found
        assert error_stats["failed_llm_calls"] == 1  # From tracker1
        assert error_stats["error_types"]["ValueError"] == 1

    def test_extract_invalid_result_structures(self):
        """Test handling of invalid result structures."""
        results = {
            "step1": "not_a_tuple",
            "step2": (1, 2),  # Too short
            "step3": (1, 2, "not_an_error_tracker"),  # Wrong type
            "step4": (1, 2, 3, 4, 5),  # Too long but valid third element
        }

        metrics = {}

        error_stats = _extract_error_statistics(results, metrics)

        # Should handle gracefully and return empty dict
        assert error_stats == {}

    def test_json_serialization_of_extracted_stats(self):
        """Test that extracted error statistics are JSON serializable."""
        error_tracker = ErrorTracker()
        error_tracker.record_error(
            error=ValueError("Serialization test"),
            input_messages=[{"role": "user", "content": "Serialize this"}],
            model_config={"name": "test-model"},
            retry_attempts=1,
            task_id="serialize_task",
        )

        results = {"task": (MagicMock(), 0.9, error_tracker)}

        metrics = {}

        error_stats = _extract_error_statistics(results, metrics)

        # Test JSON serialization
        json_str = json.dumps(error_stats, default=str)
        loaded_stats = json.loads(json_str)

        # Verify structure is preserved
        assert loaded_stats["failed_llm_calls"] == 1
        assert loaded_stats["error_types"]["ValueError"] == 1
        assert len(loaded_stats["errors"]) == 1

    def test_logging_behavior(self):
        """Test that appropriate log messages are generated."""
        # Since logging uses the @with_logger decorator which creates its own logger,
        # we'll test that the function runs without error when logging is involved
        # rather than trying to mock the specific logger calls

        # Test with metrics - should complete without error
        metrics = {"failed_llm_calls": 2}
        results = {}

        error_stats = _extract_error_statistics(results, metrics)
        assert error_stats["failed_llm_calls"] == 2

        # Test with error tracker - should complete without error
        error_tracker = ErrorTracker()
        error_tracker.record_error(
            error=ValueError("Test"),
            input_messages=[],
            model_config={},
            retry_attempts=0,
        )

        results = {"task": (None, 0, error_tracker)}
        metrics = {}

        error_stats = _extract_error_statistics(results, metrics)
        assert error_stats["failed_llm_calls"] == 1


class TestExperimentSummaryIntegration:
    """Test integration of error statistics into experiment summary structure."""

    def test_summary_structure_with_error_stats(self):
        """Test that error statistics integrate properly into experiment summary."""
        # Simulate the experiment summary structure from app.py
        base_summary = {
            "experiment_name": "test_experiment",
            "duration": 123.45,
            "timestamp": "2025-01-01 12:00:00",
            "config_file": "test_config.yaml",
            "results": {"step1": 5, "run_task": "mock_task_results"},
        }

        # Simulate error statistics from Step 4 function
        error_stats = {
            "failed_llm_calls": 3,
            "error_types": {"ValueError": 2, "RuntimeError": 1},
            "average_retry_attempts": 2.5,
            "sample_errors": [
                {
                    "timestamp": 1672531200.0,
                    "error_type": "ValueError",
                    "error_message": "Connection timeout",
                    "task_id": "task_1",
                    "retry_attempts": 3,
                    "input_preview": "What is the capital of France?",
                },
                {
                    "timestamp": 1672531260.0,
                    "error_type": "RuntimeError",
                    "error_message": "Model overloaded",
                    "task_id": "task_2",
                    "retry_attempts": 2,
                    "input_preview": "Explain quantum computing in simple terms...",
                },
            ],
        }

        # Add error statistics as app.py would do
        if error_stats:
            base_summary["error_statistics"] = error_stats

        # Verify the complete structure
        assert "error_statistics" in base_summary
        assert base_summary["error_statistics"]["failed_llm_calls"] == 3
        assert len(base_summary["error_statistics"]["sample_errors"]) == 2

        # Verify it's JSON serializable (critical for file saving)
        json_str = json.dumps(base_summary, indent=2, default=str)
        loaded_summary = json.loads(json_str)

        # Verify structure is preserved after serialization
        assert loaded_summary["experiment_name"] == "test_experiment"
        assert loaded_summary["error_statistics"]["failed_llm_calls"] == 3
        assert loaded_summary["error_statistics"]["error_types"]["ValueError"] == 2

    def test_summary_structure_without_error_stats(self):
        """Test that experiment summary works normally when no errors occur."""
        base_summary = {
            "experiment_name": "successful_experiment",
            "duration": 45.67,
            "timestamp": "2025-01-01 12:00:00",
            "config_file": "success_config.yaml",
            "results": {"step1": 10},
        }

        # Simulate no error statistics (empty dict from _extract_error_statistics)
        error_stats = {}

        # Add error statistics only if they exist (as app.py does)
        if error_stats:
            base_summary["error_statistics"] = error_stats

        # Should not have error_statistics key when no errors
        assert "error_statistics" not in base_summary
        assert base_summary["experiment_name"] == "successful_experiment"

        # Should still be JSON serializable
        json_str = json.dumps(base_summary, indent=2, default=str)
        loaded_summary = json.loads(json_str)
        assert loaded_summary["experiment_name"] == "successful_experiment"
