"""Integration tests for evaluation-only mode functionality."""

import pytest
import pandas as pd
import tempfile
import shutil
from pathlib import Path
from unittest.mock import patch, MagicMock

from src.main import run_evaluation_only_mode
from src.data_models.evaluation_result import EvaluationResult


class TestEvaluationOnlyIntegration:
    """Integration tests for evaluation-only mode."""
    
    def setup_method(self):
        """Set up test fixtures."""
        self.temp_dir = tempfile.mkdtemp()
        self.results_dir = Path(self.temp_dir) / "results"
        self.results_dir.mkdir()
        self.results_csv_path = self.results_dir / "results.csv"
        
        # Create a minimal CSV with one row
        self.sample_data = {
            'datetime': ['2025-01-01T10:00:00'],
            'algorithm_name': ['test_algorithm'],
            'task_name': ['bridge'],
            'solution': ['Test solution for integration test'],
            'feasibility_score': [0.8],
            'utility_score': [0.7],
            'novelty_score': [0.6],
            'creativity_score': [0.7],
            'original_solution_id': ['sol_1'],
            'intermediate_log_filename': ['intermediate_logs/eval_12345678-1234-1234-1234-123456789abc_sol_1.json'],
            'backbone_llm_name': ['gpt-4o'],
            'feasibility_reasoning': ['Old feasibility reasoning'],
            'utility_reasoning': ['Old utility reasoning'],
            'novelty_theme': ['Old novelty theme'],
            'num_analogous_problems': [10],
            'num_solutions_per_problem': [5],
            'num_exploratory_ideas': [50],
            'num_new_rule_sets': [3],
            'num_final_solutions': [3],
            'num_solutions_combinational': [20],
            'num_thoughts_per_step': [5],
            'search_depth': [3]
        }
        
        # Create sample CSV file
        df = pd.DataFrame(self.sample_data)
        df.to_csv(self.results_csv_path, index=False)
    
    def teardown_method(self):
        """Clean up test fixtures."""
        shutil.rmtree(self.temp_dir)
    
    @patch('src.main.Path')
    @patch('src.main.run_evaluation')
    @patch('src.main.load_optimal_solutions')
    @patch('src.main.load_calibration_anchors')
    @patch('src.main.load_known_solutions_concept')
    @patch('src.main.load_known_solutions')
    @patch('src.main.load_feasibility_check_points')
    @patch('src.main.load_task_description')
    @patch('src.main.shutil.copyfile')
    def test_end_to_end_evaluation_only_mode(self, mock_copyfile, mock_load_task, mock_load_feasibility, 
                                           mock_load_known, mock_load_known_concept, mock_load_calibration,
                                           mock_load_optimal, mock_run_evaluation, mock_path):
        """Test complete end-to-end evaluation-only mode workflow."""
        # Mock Path to return our test directory
        mock_path.return_value.parent.parent = Path(self.temp_dir)
        
        # Mock all the loading functions with realistic data
        mock_load_task.return_value = "Design a traffic control system for a single-lane bridge"
        mock_load_feasibility.return_value = [
            "System must prevent simultaneous opposing traffic",
            "Must use existing infrastructure",
            "Must be implementable with current technology"
        ]
        mock_load_known.return_value = [
            "Fixed time intervals",
            "Manual traffic control",
            "Simple alternating system"
        ]
        mock_load_known_concept.return_value = [
            "Time-based control",
            "Human-operated system",
            "Basic alternating mechanism"
        ]
        mock_load_calibration.return_value = [
            "Score 5: Competent solution with clear improvement over baseline",
            "Score 8: Significant improvement with advanced concepts"
        ]
        mock_load_optimal.return_value = (
            ["AI-powered dynamic scheduling with predictive analytics"],
            ["Intelligent adaptive control system"]
        )
        
        # Mock evaluation results
        mock_eval_result = EvaluationResult(
            original_solution_id='sol_1_new',
            individual_solution_text='Test solution for integration test',
            feasibility_score=0.9,
            utility_score=0.8,
            novelty_score=0.7,
            creativity_score=0.8,
            intermediate_log_filename='intermediate_logs/eval_new123_sol_1_new.json',
            feasibility_reasoning='New feasibility reasoning from integration test',
            utility_reasoning='New utility reasoning from integration test',
            novelty_theme='New novelty theme from integration test'
        )
        mock_run_evaluation.return_value = [mock_eval_result]
        
        # Run the evaluation-only mode
        run_evaluation_only_mode()
        
        # Verify backup was created
        mock_copyfile.assert_called_once()
        backup_call_args = mock_copyfile.call_args
        assert backup_call_args[0][0] == self.results_csv_path
        assert "results_before_evaluation_" in str(backup_call_args[0][1])
        assert backup_call_args[0][1].endswith(".csv")
        
        # Verify all loading functions were called
        mock_load_task.assert_called_once_with("bridge")
        mock_load_feasibility.assert_called_once_with("bridge")
        mock_load_known.assert_called_once_with("bridge")
        mock_load_known_concept.assert_called_once_with("bridge")
        mock_load_calibration.assert_called_once_with("bridge")
        mock_load_optimal.assert_called_once_with("bridge")
        
        # Verify run_evaluation was called with correct parameters
        mock_run_evaluation.assert_called_once()
        eval_call_args = mock_run_evaluation.call_args
        assert eval_call_args[1]['solution_text'] == 'Test solution for integration test'
        assert eval_call_args[1]['num_final_solutions'] == 3
        
        # Verify the CSV was updated
        updated_df = pd.read_csv(self.results_csv_path)
        assert len(updated_df) == 1
        
        # Check that new evaluation data is present
        assert updated_df.iloc[0]['feasibility_score'] == 0.9
        assert updated_df.iloc[0]['utility_score'] == 0.8
        assert updated_df.iloc[0]['novelty_score'] == 0.7
        assert updated_df.iloc[0]['creativity_score'] == 0.8
        assert updated_df.iloc[0]['feasibility_reasoning'] == 'New feasibility reasoning from integration test'
        assert updated_df.iloc[0]['utility_reasoning'] == 'New utility reasoning from integration test'
        assert updated_df.iloc[0]['novelty_theme'] == 'New novelty theme from integration test'
        assert updated_df.iloc[0]['intermediate_log_filename'] == 'intermediate_logs/eval_new123_sol_1_new.json'
        
        # Check that datetime was updated
        assert updated_df.iloc[0]['datetime'] != '2025-01-01T10:00:00'
        
        # Check that run_id was extracted and used
        assert '12345678-1234-1234-1234-123456789abc' in str(eval_call_args[1]['run_id'])
    
    @patch('src.main.Path')
    def test_integration_missing_csv_file(self, mock_path):
        """Test integration behavior when results.csv doesn't exist."""
        # Mock Path to return our test directory
        mock_path.return_value.parent.parent = Path(self.temp_dir)
        
        # Remove the CSV file
        self.results_csv_path.unlink()
        
        with pytest.raises(SystemExit):
            run_evaluation_only_mode()
    
    @patch('src.main.Path')
    @patch('src.main.run_evaluation')
    @patch('src.main.load_optimal_solutions')
    @patch('src.main.load_calibration_anchors')
    @patch('src.main.load_known_solutions_concept')
    @patch('src.main.load_known_solutions')
    @patch('src.main.load_feasibility_check_points')
    @patch('src.main.load_task_description')
    @patch('src.main.shutil.copyfile')
    def test_integration_multiple_solutions_per_row(self, mock_copyfile, mock_load_task, mock_load_feasibility, 
                                                   mock_load_known, mock_load_known_concept, mock_load_calibration,
                                                   mock_load_optimal, mock_run_evaluation, mock_path):
        """Test integration when run_evaluation returns multiple solutions for one row."""
        # Mock Path to return our test directory
        mock_path.return_value.parent.parent = Path(self.temp_dir)
        
        # Mock all the loading functions
        mock_load_task.return_value = "Test task"
        mock_load_feasibility.return_value = ["Check 1"]
        mock_load_known.return_value = ["Solution 1"]
        mock_load_known_concept.return_value = ["Concept 1"]
        mock_load_calibration.return_value = ["Anchor 1"]
        mock_load_optimal.return_value = (["Optimal 1"], ["Optimal concept 1"])
        
        # Mock evaluation to return multiple results
        mock_eval_results = [
            EvaluationResult(
                original_solution_id='sol_1_new',
                individual_solution_text='Test solution 1',
                feasibility_score=0.9,
                utility_score=0.8,
                novelty_score=0.7,
                creativity_score=0.8,
                intermediate_log_filename='intermediate_logs/eval_new123_sol_1_new.json',
                feasibility_reasoning='Reasoning 1',
                utility_reasoning='Reasoning 1',
                novelty_theme='Theme 1'
            ),
            EvaluationResult(
                original_solution_id='sol_2_new',
                individual_solution_text='Test solution 2',
                feasibility_score=0.85,
                utility_score=0.75,
                novelty_score=0.65,
                creativity_score=0.75,
                intermediate_log_filename='intermediate_logs/eval_new123_sol_2_new.json',
                feasibility_reasoning='Reasoning 2',
                utility_reasoning='Reasoning 2',
                novelty_theme='Theme 2'
            )
        ]
        mock_run_evaluation.return_value = mock_eval_results
        
        # Run the evaluation-only mode
        run_evaluation_only_mode()
        
        # Verify the CSV was updated with multiple rows
        updated_df = pd.read_csv(self.results_csv_path)
        assert len(updated_df) == 2  # One original row became two rows
        
        # Check that both solutions are present
        assert any(updated_df['feasibility_score'] == 0.9)
        assert any(updated_df['feasibility_score'] == 0.85)
        assert any(updated_df['original_solution_id'] == 'sol_1_new')
        assert any(updated_df['original_solution_id'] == 'sol_2_new')
