"""Unit tests for main.py evaluation-only mode functionality."""

import pytest
import pandas as pd
import tempfile
import shutil
from pathlib import Path
from unittest.mock import patch, MagicMock, mock_open
from datetime import datetime

from src.main import run_evaluation_only_mode
from src.data_models.evaluation_result import EvaluationResult


class TestEvaluationOnlyMode:
    """Test cases for evaluation-only mode functionality."""
    
    def setup_method(self):
        """Set up test fixtures."""
        self.temp_dir = tempfile.mkdtemp()
        self.results_dir = Path(self.temp_dir) / "results"
        self.results_dir.mkdir()
        self.results_csv_path = self.results_dir / "results.csv"
        
        # Sample CSV data
        self.sample_data = {
            'datetime': ['2025-01-01T10:00:00', '2025-01-01T10:01:00'],
            'algorithm_name': ['test_algorithm', 'test_algorithm'],
            'task_name': ['bridge', 'bridge'],
            'solution': ['Test solution 1', 'Test solution 2'],
            'feasibility_score': [0.8, 0.9],
            'utility_score': [0.7, 0.8],
            'novelty_score': [0.6, 0.7],
            'creativity_score': [0.7, 0.8],
            'original_solution_id': ['sol_1', 'sol_2'],
            'intermediate_log_filename': ['intermediate_logs/eval_123_sol_1.json', 'intermediate_logs/eval_456_sol_2.json'],
            'backbone_llm_name': ['gpt-4o', 'gpt-4o'],
            'feasibility_reasoning': ['Old reasoning 1', 'Old reasoning 2'],
            'utility_reasoning': ['Old reasoning 1', 'Old reasoning 2'],
            'novelty_theme': ['Old theme 1', 'Old theme 2'],
            'num_analogous_problems': [10, 10],
            'num_solutions_per_problem': [5, 5],
            'num_exploratory_ideas': [50, 50],
            'num_new_rule_sets': [3, 3],
            'num_final_solutions': [3, 3],
            'num_solutions_combinational': [20, 20],
            'num_thoughts_per_step': [5, 5],
            'search_depth': [3, 3]
        }
        
        # Create sample CSV file
        df = pd.DataFrame(self.sample_data)
        df.to_csv(self.results_csv_path, index=False)
        
        # Mock evaluation results
        self.mock_evaluation_results = [
            EvaluationResult(
                original_solution_id='sol_1_new',
                individual_solution_text='Test solution 1',
                feasibility_score=0.9,
                utility_score=0.8,
                novelty_score=0.7,
                creativity_score=0.8,
                intermediate_log_filename='intermediate_logs/eval_new123_sol_1_new.json',
                feasibility_reasoning='New feasibility reasoning',
                utility_reasoning='New utility reasoning',
                novelty_theme='New novelty theme'
            ),
            EvaluationResult(
                original_solution_id='sol_2_new',
                individual_solution_text='Test solution 2',
                feasibility_score=0.95,
                utility_score=0.85,
                novelty_score=0.75,
                creativity_score=0.85,
                intermediate_log_filename='intermediate_logs/eval_new456_sol_2_new.json',
                feasibility_reasoning='New feasibility reasoning 2',
                utility_reasoning='New utility reasoning 2',
                novelty_theme='New novelty theme 2'
            )
        ]
    
    def teardown_method(self):
        """Clean up test fixtures."""
        shutil.rmtree(self.temp_dir)
    
    @patch('src.main.Path')
    @patch('src.main.run_evaluation')
    @patch('src.main.load_optimal_solutions')
    @patch('src.main.load_calibration_anchors')
    @patch('src.main.load_known_solutions_concept')
    @patch('src.main.load_known_solutions')
    @patch('src.main.load_feasibility_check_points')
    @patch('src.main.load_task_description')
    @patch('src.main.shutil.copyfile')
    def test_run_evaluation_only_mode_success(self, mock_copyfile, mock_load_task, mock_load_feasibility, 
                                            mock_load_known, mock_load_known_concept, mock_load_calibration,
                                            mock_load_optimal, mock_run_evaluation, mock_path):
        """Test successful execution of evaluation-only mode."""
        # Mock Path to return our test directory
        mock_path.return_value.parent.parent = Path(self.temp_dir)
        
        # Mock all the loading functions
        mock_load_task.return_value = "Test task description"
        mock_load_feasibility.return_value = ["Check 1", "Check 2"]
        mock_load_known.return_value = ["Known solution 1", "Known solution 2"]
        mock_load_known_concept.return_value = ["Concept 1", "Concept 2"]
        mock_load_calibration.return_value = ["Anchor 1", "Anchor 2"]
        mock_load_optimal.return_value = (["Optimal 1"], ["Optimal concept 1"])
        
        # Mock run_evaluation to return our mock results
        mock_run_evaluation.return_value = self.mock_evaluation_results
        
        # Run the function
        run_evaluation_only_mode()
        
        # Verify backup was created
        mock_copyfile.assert_called_once()
        
        # Verify all loading functions were called
        assert mock_load_task.call_count == 2  # Called for each row
        assert mock_load_feasibility.call_count == 2
        assert mock_load_known.call_count == 2
        assert mock_load_known_concept.call_count == 2
        assert mock_load_calibration.call_count == 2
        assert mock_load_optimal.call_count == 2
        
        # Verify run_evaluation was called for each row
        assert mock_run_evaluation.call_count == 2
        
        # Verify the CSV was updated
        updated_df = pd.read_csv(self.results_csv_path)
        assert len(updated_df) == 4  # 2 original rows * 2 evaluation results each
        
        # Check that new evaluation data is present
        assert any(updated_df['feasibility_score'] == 0.9)
        assert any(updated_df['feasibility_score'] == 0.95)
        assert any(updated_df['feasibility_reasoning'] == 'New feasibility reasoning')
    
    @patch('src.main.Path')
    def test_run_evaluation_only_mode_no_csv_file(self, mock_path):
        """Test handling when results.csv doesn't exist."""
        # Mock Path to return a directory without results.csv
        mock_path.return_value.parent.parent = Path(self.temp_dir)
        
        # Remove the CSV file
        self.results_csv_path.unlink()
        
        with pytest.raises(SystemExit):
            run_evaluation_only_mode()
    
    @patch('src.main.Path')
    @patch('src.main.pd.read_csv')
    def test_run_evaluation_only_mode_empty_csv(self, mock_read_csv, mock_path):
        """Test handling when results.csv is empty."""
        # Mock Path to return our test directory
        mock_path.return_value.parent.parent = Path(self.temp_dir)
        
        # Mock empty DataFrame
        mock_read_csv.return_value = pd.DataFrame()
        
        # Should not raise an error, just return early
        run_evaluation_only_mode()
        
        # Verify CSV was read
        mock_read_csv.assert_called_once()
    
    @patch('src.main.Path')
    @patch('src.main.run_evaluation')
    @patch('src.main.load_optimal_solutions')
    @patch('src.main.load_calibration_anchors')
    @patch('src.main.load_known_solutions_concept')
    @patch('src.main.load_known_solutions')
    @patch('src.main.load_feasibility_check_points')
    @patch('src.main.load_task_description')
    @patch('src.main.shutil.copyfile')
    def test_run_evaluation_only_mode_evaluation_error(self, mock_copyfile, mock_load_task, 
                                                      mock_load_feasibility, mock_load_known, 
                                                      mock_load_known_concept, mock_load_calibration,
                                                      mock_load_optimal, mock_run_evaluation, mock_path):
        """Test handling when evaluation fails for a row."""
        # Mock Path to return our test directory
        mock_path.return_value.parent.parent = Path(self.temp_dir)
        
        # Mock all the loading functions
        mock_load_task.return_value = "Test task description"
        mock_load_feasibility.return_value = ["Check 1", "Check 2"]
        mock_load_known.return_value = ["Known solution 1", "Known solution 2"]
        mock_load_known_concept.return_value = ["Concept 1", "Concept 2"]
        mock_load_calibration.return_value = ["Anchor 1", "Anchor 2"]
        mock_load_optimal.return_value = (["Optimal 1"], ["Optimal concept 1"])
        
        # Mock run_evaluation to fail for first row, succeed for second
        mock_run_evaluation.side_effect = [Exception("Evaluation failed"), self.mock_evaluation_results[1:]]
        
        # Run the function
        run_evaluation_only_mode()
        
        # Verify the CSV was still updated (with original data for failed row)
        updated_df = pd.read_csv(self.results_csv_path)
        assert len(updated_df) == 3  # 1 original + 1 failed + 1 successful evaluation
        
        # Check that original data is preserved for failed row
        original_rows = updated_df[updated_df['feasibility_score'] == 0.8]
        assert len(original_rows) == 1
    
    @patch('src.main.Path')
    @patch('src.main.load_optimal_solutions')
    @patch('src.main.load_calibration_anchors')
    @patch('src.main.load_known_solutions_concept')
    @patch('src.main.load_known_solutions')
    @patch('src.main.load_feasibility_check_points')
    @patch('src.main.load_task_description')
    @patch('src.main.shutil.copyfile')
    def test_run_evaluation_only_mode_missing_config_files(self, mock_copyfile, mock_load_task, 
                                                          mock_load_feasibility, mock_load_known, 
                                                          mock_load_known_concept, mock_load_calibration,
                                                          mock_load_optimal, mock_path):
        """Test handling when some config files are missing."""
        # Mock Path to return our test directory
        mock_path.return_value.parent.parent = Path(self.temp_dir)
        
        # Mock loading functions with some missing files
        mock_load_task.return_value = "Test task description"
        mock_load_feasibility.return_value = ["Check 1", "Check 2"]
        mock_load_known.return_value = ["Known solution 1", "Known solution 2"]
        mock_load_known_concept.side_effect = FileNotFoundError("File not found")
        mock_load_calibration.return_value = ["Anchor 1", "Anchor 2"]
        mock_load_optimal.side_effect = FileNotFoundError("File not found")
        
        # Mock run_evaluation
        with patch('src.main.run_evaluation') as mock_run_evaluation:
            mock_run_evaluation.return_value = self.mock_evaluation_results
            
            # Run the function
            run_evaluation_only_mode()
            
            # Verify it handled missing files gracefully
            assert mock_load_known_concept.call_count == 2
            assert mock_load_optimal.call_count == 2
    
    def test_run_id_extraction_from_filename(self):
        """Test extraction of run_id from intermediate_log_filename."""
        # Test cases for run_id extraction
        test_cases = [
            ("intermediate_logs/eval_12345678-1234-1234-1234-123456789abc_sol_1.json", "12345678-1234-1234-1234-123456789abc"),
            ("eval_98765432-4321-4321-4321-987654321def_sol_2.json", "98765432-4321-4321-4321-987654321def"),
            ("some_other_file.json", None),  # Should generate new UUID
            ("", None),  # Should generate new UUID
        ]
        
        for filename, expected_run_id in test_cases:
            # This would be tested in the actual function, but we can test the logic
            if filename and 'eval_' in filename:
                try:
                    filename_part = filename.split('/')[-1]
                    if filename_part.startswith('eval_'):
                        extracted_run_id = filename_part.split('_')[1]
                        assert extracted_run_id == expected_run_id
                except (IndexError, AttributeError):
                    # Should generate new UUID
                    pass
    
    @patch('src.main.Path')
    @patch('src.main.run_evaluation')
    @patch('src.main.load_optimal_solutions')
    @patch('src.main.load_calibration_anchors')
    @patch('src.main.load_known_solutions_concept')
    @patch('src.main.load_known_solutions')
    @patch('src.main.load_feasibility_check_points')
    @patch('src.main.load_task_description')
    @patch('src.main.shutil.copyfile')
    def test_backup_filename_format(self, mock_copyfile, mock_load_task, mock_load_feasibility, 
                                   mock_load_known, mock_load_known_concept, mock_load_calibration,
                                   mock_load_optimal, mock_run_evaluation, mock_path):
        """Test that backup filename uses correct timestamp format."""
        # Mock Path to return our test directory
        mock_path.return_value.parent.parent = Path(self.temp_dir)
        
        # Mock all the loading functions
        mock_load_task.return_value = "Test task description"
        mock_load_feasibility.return_value = ["Check 1", "Check 2"]
        mock_load_known.return_value = ["Known solution 1", "Known solution 2"]
        mock_load_known_concept.return_value = ["Concept 1", "Concept 2"]
        mock_load_calibration.return_value = ["Anchor 1", "Anchor 2"]
        mock_load_optimal.return_value = (["Optimal 1"], ["Optimal concept 1"])
        
        # Mock run_evaluation
        mock_run_evaluation.return_value = self.mock_evaluation_results
        
        # Mock datetime to return a fixed timestamp
        with patch('src.main.datetime') as mock_datetime:
            mock_datetime.now.return_value.strftime.return_value = "2025-01-15_14-30-45"
            mock_datetime.now.return_value.isoformat.return_value = "2025-01-15T14:30:45.123456"
            
            # Run the function
            run_evaluation_only_mode()
            
            # Verify backup filename format
            expected_backup_path = self.results_dir / "results_before_evaluation_2025-01-15_14-30-45.csv"
            mock_copyfile.assert_called_once_with(self.results_csv_path, expected_backup_path)
    
    @patch('src.main.Path')
    @patch('src.main.run_evaluation')
    @patch('src.main.load_optimal_solutions')
    @patch('src.main.load_calibration_anchors')
    @patch('src.main.load_known_solutions_concept')
    @patch('src.main.load_known_solutions')
    @patch('src.main.load_feasibility_check_points')
    @patch('src.main.load_task_description')
    @patch('src.main.shutil.copyfile')
    def test_csv_update_with_new_evaluation_data(self, mock_copyfile, mock_load_task, mock_load_feasibility, 
                                                mock_load_known, mock_load_known_concept, mock_load_calibration,
                                                mock_load_optimal, mock_run_evaluation, mock_path):
        """Test that CSV is properly updated with new evaluation data."""
        # Mock Path to return our test directory
        mock_path.return_value.parent.parent = Path(self.temp_dir)
        
        # Mock all the loading functions
        mock_load_task.return_value = "Test task description"
        mock_load_feasibility.return_value = ["Check 1", "Check 2"]
        mock_load_known.return_value = ["Known solution 1", "Known solution 2"]
        mock_load_known_concept.return_value = ["Concept 1", "Concept 2"]
        mock_load_calibration.return_value = ["Anchor 1", "Anchor 2"]
        mock_load_optimal.return_value = (["Optimal 1"], ["Optimal concept 1"])
        
        # Mock run_evaluation to return single result per row
        mock_run_evaluation.side_effect = [
            [self.mock_evaluation_results[0]],  # First row
            [self.mock_evaluation_results[1]]   # Second row
        ]
        
        # Run the function
        run_evaluation_only_mode()
        
        # Verify the CSV was updated
        updated_df = pd.read_csv(self.results_csv_path)
        assert len(updated_df) == 2  # Same number of rows
        
        # Check that new evaluation data is present
        assert updated_df.iloc[0]['feasibility_score'] == 0.9
        assert updated_df.iloc[0]['utility_score'] == 0.8
        assert updated_df.iloc[0]['novelty_score'] == 0.7
        assert updated_df.iloc[0]['creativity_score'] == 0.8
        assert updated_df.iloc[0]['feasibility_reasoning'] == 'New feasibility reasoning'
        assert updated_df.iloc[0]['utility_reasoning'] == 'New utility reasoning'
        assert updated_df.iloc[0]['novelty_theme'] == 'New novelty theme'
        assert updated_df.iloc[0]['intermediate_log_filename'] == 'intermediate_logs/eval_new123_sol_1_new.json'
        
        # Check that datetime was updated
        assert updated_df.iloc[0]['datetime'] != '2025-01-01T10:00:00'
