"""Unit tests for combinational creative reasoning algorithm."""

import pytest
import json
from unittest.mock import patch, MagicMock
from src.algorithms.combinational_creative_reasoning.main import reasoning_model
from src.data_models.task_config import TaskConfig


class TestCombinationalCreativeReasoning:
    """Test cases for combinational creative reasoning model class."""
    
    def setup_method(self):
        """Set up test fixtures."""
        self.task_config = TaskConfig(
            feasibility_check_points=["Check 1", "Check 2"],
            task_description="Design a bridge crossing system",
            known_solutions=["Solution 1: Build a wider bridge", "Solution 2: Add traffic lights"]
        )
    
    def test_init_success(self):
        """Test successful initialization of reasoning_model."""
        model = reasoning_model(self.task_config, "gpt-4", 10, 5, 3, 10)
        
        assert model.task_config == self.task_config
        assert model.backbone_llm_name == "gpt-4"
        assert model.num_analogous_problems == 10
        assert model.num_solutions_per_problem == 5
        assert model.num_solutions_combinational == 10
        assert model.llm_client is not None
    
    def test_init_invalid_task_config(self):
        """Test initialization with invalid task config."""
        with pytest.raises(ValueError, match="task_config must be a TaskConfig object"):
            reasoning_model("invalid", "gpt-4", 10, 5, 3, 10)
        
        with pytest.raises(ValueError, match="task_config must be a TaskConfig object"):
            reasoning_model(None, "gpt-4", 10, 5, 3)
    
    def test_init_invalid_backbone_llm_name(self):
        """Test initialization with invalid backbone LLM name."""
        with pytest.raises(ValueError, match="backbone_llm_name must be a non-empty string"):
            reasoning_model(self.task_config, "", 10, 5, 3, 10)
        
        with pytest.raises(ValueError, match="backbone_llm_name must be a non-empty string"):
            reasoning_model(self.task_config, None, 10, 5, 3, 10)
        
        with pytest.raises(ValueError, match="backbone_llm_name must be a non-empty string"):
            reasoning_model(self.task_config, "   ", 10, 5, 3, 10)
    
    def test_init_invalid_num_analogous_problems(self):
        """Test initialization with invalid num_analogous_problems."""
        with pytest.raises(ValueError, match="num_analogous_problems must be a positive integer"):
            reasoning_model(self.task_config, "gpt-4", 0, 5, 3, 10)
        
        with pytest.raises(ValueError, match="num_analogous_problems must be a positive integer"):
            reasoning_model(self.task_config, "gpt-4", -1, 5, 3, 10)
        
        with pytest.raises(ValueError, match="num_analogous_problems must be a positive integer"):
            reasoning_model(self.task_config, "gpt-4", "invalid", 5, 3, 10)
    
    def test_init_invalid_num_solutions_per_problem(self):
        """Test initialization with invalid num_solutions_per_problem."""
        with pytest.raises(ValueError, match="num_solutions_per_problem must be a positive integer"):
            reasoning_model(self.task_config, "gpt-4", 10, 0, 3, 10)
        
        with pytest.raises(ValueError, match="num_solutions_per_problem must be a positive integer"):
            reasoning_model(self.task_config, "gpt-4", 10, -1, 3, 10)
        
        with pytest.raises(ValueError, match="num_solutions_per_problem must be a positive integer"):
            reasoning_model(self.task_config, "gpt-4", 10, "invalid", 3, 10)
    
    def test_init_invalid_num_solutions_combinational(self):
        """Test initialization with invalid num_solutions_combinational."""
        with pytest.raises(ValueError, match="num_solutions_combinational must be a positive integer"):
            reasoning_model(self.task_config, "gpt-4", 10, 5, 3, 0)
        
        with pytest.raises(ValueError, match="num_solutions_combinational must be a positive integer"):
            reasoning_model(self.task_config, "gpt-4", 10, 5, 3, -1)
        
        with pytest.raises(ValueError, match="num_solutions_combinational must be a positive integer"):
            reasoning_model(self.task_config, "gpt-4", 10, 5, 3, "invalid")
    
    def test_find_analogous_problems_success(self):
        """Test successful finding of analogous problems."""
        model = reasoning_model(self.task_config, "gpt-4", 3, 2, 3, 10)
        
        # Mock LLM response
        mock_response = '["Traffic flow optimization", "Resource allocation", "Queue management"]'
        model.llm_client.call_llm_model = MagicMock(return_value=mock_response)
        
        result = model._find_analogous_problems()
        
        assert len(result) == 3
        assert "Traffic flow optimization" in result
        assert "Resource allocation" in result
        assert "Queue management" in result
        
        # Verify LLM was called with correct parameters
        model.llm_client.call_llm_model.assert_called_once()
        call_args = model.llm_client.call_llm_model.call_args
        assert call_args[1]['model_name'] == "gpt-4"
        assert call_args[1]['temperature'] == 0.7
        assert "3 problems" in call_args[1]['prompt']
    
    def test_find_analogous_problems_fallback(self):
        """Test fallback behavior when LLM response is invalid."""
        model = reasoning_model(self.task_config, "gpt-4", 3, 2, 3, 10)
        
        # Mock LLM response with invalid JSON
        model.llm_client.call_llm_model = MagicMock(return_value="Invalid response")
        
        result = model._find_analogous_problems()
        
        assert len(result) == 3
        assert all("Analogous problem" in problem for problem in result)
    
    def test_find_solutions_for_problems_success(self):
        """Test successful finding of solutions for problems."""
        model = reasoning_model(self.task_config, "gpt-4", 3, 2, 3, 10)
        analogous_problems = ["Problem 1", "Problem 2"]
        
        # Mock LLM response
        mock_response = '{"1": ["Solution 1.1", "Solution 1.2"], "2": ["Solution 2.1", "Solution 2.2"]}'
        model.llm_client.call_llm_model = MagicMock(return_value=mock_response)
        
        result = model._find_solutions_for_problems(analogous_problems)
        
        assert len(result) == 2
        assert "Problem 1" in result
        assert "Problem 2" in result
        assert len(result["Problem 1"]) == 2
        assert len(result["Problem 2"]) == 2
        
        # Verify LLM was called with correct parameters
        model.llm_client.call_llm_model.assert_called_once()
        call_args = model.llm_client.call_llm_model.call_args
        assert call_args[1]['model_name'] == "gpt-4"
        assert call_args[1]['temperature'] == 0.7
    
    def test_find_solutions_for_problems_fallback(self):
        """Test fallback behavior when LLM response is invalid."""
        model = reasoning_model(self.task_config, "gpt-4", 3, 2, 3, 10)
        analogous_problems = ["Problem 1", "Problem 2"]
        
        # Mock LLM response with invalid JSON
        model.llm_client.call_llm_model = MagicMock(return_value="Invalid response")
        
        result = model._find_solutions_for_problems(analogous_problems)
        
        assert len(result) == 2
        assert "Problem 1" in result
        assert "Problem 2" in result
        assert len(result["Problem 1"]) == 2
        assert len(result["Problem 2"]) == 2
    
    def test_decompose_solutions_into_ideas_success(self):
        """Test successful decomposition of solutions into ideas."""
        model = reasoning_model(self.task_config, "gpt-4", 3, 2, 3, 10)
        solutions_per_problem = {
            "Problem 1": ["Solution 1.1", "Solution 1.2"],
            "Problem 2": ["Solution 2.1", "Solution 2.2"]
        }
        
        # Mock LLM response
        mock_response = '["Idea 1: Efficiency", "Idea 2: Optimization", "Idea 3: Automation"]'
        model.llm_client.call_llm_model = MagicMock(return_value=mock_response)
        
        result = model._decompose_solutions_into_ideas(solutions_per_problem)
        
        assert len(result) == 3
        assert "Idea 1: Efficiency" in result
        assert "Idea 2: Optimization" in result
        assert "Idea 3: Automation" in result
        
        # Verify LLM was called with correct parameters
        model.llm_client.call_llm_model.assert_called_once()
        call_args = model.llm_client.call_llm_model.call_args
        assert call_args[1]['model_name'] == "gpt-4"
        assert call_args[1]['temperature'] == 0.7
    
    def test_decompose_solutions_into_ideas_fallback(self):
        """Test fallback behavior when LLM response is invalid."""
        model = reasoning_model(self.task_config, "gpt-4", 3, 2, 3, 10)
        solutions_per_problem = {
            "Problem 1": ["Solution 1.1", "Solution 1.2"]
        }
        
        # Mock LLM response with invalid JSON
        model.llm_client.call_llm_model = MagicMock(return_value="Invalid response")
        
        result = model._decompose_solutions_into_ideas(solutions_per_problem)
        
        assert len(result) == 10  # Default fallback count
        assert all("Core idea" in idea for idea in result)
    
    def test_identify_impactful_ideas_success(self):
        """Test successful identification of impactful ideas."""
        model = reasoning_model(self.task_config, "gpt-4", 3, 2, 3, 10)
        
        # Mock LLM response
        mock_response = '{"1": ["Impactful idea 1", "Impactful idea 2"], "2": ["Impactful idea 3"]}'
        model.llm_client.call_llm_model = MagicMock(return_value=mock_response)
        
        result = model._identify_impactful_ideas()
        
        assert len(result) == 2
        assert "Solution 1: Build a wider bridge" in result
        assert "Solution 2: Add traffic lights" in result
        assert len(result["Solution 1: Build a wider bridge"]) == 2
        assert len(result["Solution 2: Add traffic lights"]) == 1
        
        # Verify LLM was called with correct parameters
        model.llm_client.call_llm_model.assert_called_once()
        call_args = model.llm_client.call_llm_model.call_args
        assert call_args[1]['model_name'] == "gpt-4"
        assert call_args[1]['temperature'] == 0.7
    
    def test_generate_new_solutions_success(self):
        """Test successful generation of new solutions."""
        # Create task config with explicit number of solutions
        task_config = TaskConfig(
            feasibility_check_points=["Check 1", "Check 2"],
            task_description="Generate 3 solutions for this problem",
            known_solutions=["Solution 1", "Solution 2"]
        )
        model = reasoning_model(task_config, "gpt-4", 3, 2, 3, 5)  # Use 5 as num_solutions_combinational
        impactful_ideas = {
            "Solution 1": ["Idea 1", "Idea 2"]
        }
        all_ideas = ["Idea A", "Idea B", "Idea C"]
        
        # Mock LLM response
        mock_response = '["New solution 1", "New solution 2", "New solution 3", "New solution 4", "New solution 5"]'
        model.llm_client.call_llm_model = MagicMock(return_value=mock_response)
        
        result = model._generate_new_solutions(impactful_ideas, all_ideas)
        
        assert len(result) == 5  # Should use num_solutions_combinational (5), not extract from task (3)
        assert "New solution 1" in result
        assert "New solution 2" in result
        assert "New solution 3" in result
        assert "New solution 4" in result
        assert "New solution 5" in result
        
        # Verify LLM was called with correct parameters
        model.llm_client.call_llm_model.assert_called_once()
        call_args = model.llm_client.call_llm_model.call_args
        assert call_args[1]['model_name'] == "gpt-4"
        assert call_args[1]['temperature'] == 0.7
        # Verify the prompt contains the correct number of solutions (5, not 3 from task description)
        assert "Generate 5 new creative solutions" in call_args[1]['prompt']
    
    def test_evaluate_and_rank_solutions_success(self):
        """Test successful evaluation and ranking of solutions."""
        model = reasoning_model(self.task_config, "gpt-4", 3, 2, 3, 10)
        new_solutions = ["Solution 1", "Solution 2", "Solution 3"]
        
        # Mock LLM response
        mock_response = '{"1": 85, "2": 72, "3": 91}'
        model.llm_client.call_llm_model = MagicMock(return_value=mock_response)
        
        result = model._evaluate_and_rank_solutions(new_solutions)
        
        assert len(result) == 3
        # Should be sorted by score in descending order
        assert result[0][1] == 91.0  # Highest score first
        assert result[1][1] == 85.0
        assert result[2][1] == 72.0  # Lowest score last
        
        # Verify LLM was called with correct parameters
        model.llm_client.call_llm_model.assert_called_once()
        call_args = model.llm_client.call_llm_model.call_args
        assert call_args[1]['model_name'] == "gpt-4"
        assert call_args[1]['temperature'] == 0.7
    
    def test_format_final_solutions(self):
        """Test formatting of final solutions."""
        model = reasoning_model(self.task_config, "gpt-4", 3, 2, 3, 10)
        ranked_solutions = [
            ("Best solution", 95.0),
            ("Good solution", 80.0),
            ("Okay solution", 65.0)
        ]
        
        result = model._format_final_solutions(ranked_solutions)
        
        assert "Solution 1 (Score: 95.0):" in result
        assert "Best solution" in result
        assert "Solution 2 (Score: 80.0):" in result
        assert "Good solution" in result
        assert "Solution 3 (Score: 65.0):" in result
        assert "Okay solution" in result
    
    def test_format_final_solutions_with_num_final_solutions(self):
        """Test formatting of final solutions with num_final_solutions parameter."""
        model = reasoning_model(self.task_config, "gpt-4", 3, 2, 2, 10)  # num_final_solutions = 2
        ranked_solutions = [
            ("Best solution", 95.0),
            ("Good solution", 80.0),
            ("Okay solution", 65.0),
            ("Poor solution", 50.0)
        ]
        
        result = model._format_final_solutions(ranked_solutions)
        
        # Should only include top 2 solutions
        assert "Solution 1 (Score: 95.0):" in result
        assert "Best solution" in result
        assert "Solution 2 (Score: 80.0):" in result
        assert "Good solution" in result
        # Should not include the other solutions
        assert "Solution 3" not in result
        assert "Okay solution" not in result
        assert "Poor solution" not in result
    
    def test_run_success(self):
        """Test successful execution of the full algorithm."""
        model = reasoning_model(self.task_config, "gpt-4", 2, 2, 3, 10)
        
        # Mock all LLM calls
        model.llm_client.call_llm_model = MagicMock(side_effect=[
            '["Problem 1", "Problem 2"]',  # Find analogous problems
            '{"1": ["Sol 1.1", "Sol 1.2"], "2": ["Sol 2.1", "Sol 2.2"]}',  # Find solutions
            '["Idea 1", "Idea 2", "Idea 3"]',  # Decompose into ideas
            '{"1": ["Impact 1"], "2": ["Impact 2"]}',  # Identify impactful ideas
            '["New 1", "New 2", "New 3"]',  # Generate new solutions
            '{"1": 85, "2": 72, "3": 91}'  # Evaluate solutions
        ])
        
        solution_text, intermediate_logs = model.run()
        
        # Verify result contains formatted solutions
        assert "Solution 1 (Score:" in solution_text
        assert "Solution 2 (Score:" in solution_text
        assert "Solution 3 (Score:" in solution_text
        
        # Verify intermediate logs were collected
        assert len(intermediate_logs) == 6
        # Verify each log entry has the correct format: (step_name, [log_dict])
        for log_entry in intermediate_logs:
            assert isinstance(log_entry, tuple)
            assert len(log_entry) == 2
            assert isinstance(log_entry[0], str)  # step name
            assert isinstance(log_entry[1], list)  # list of log dictionaries
            assert len(log_entry[1]) == 1  # one log dictionary per step
            assert isinstance(log_entry[1][0], dict)  # log dictionary
        
        # Verify all 6 LLM calls were made
        assert model.llm_client.call_llm_model.call_count == 6
    
    def test_run_exception_handling(self):
        """Test exception handling during algorithm execution."""
        model = reasoning_model(self.task_config, "gpt-4", 2, 2, 3, 10)
        
        # Mock LLM to raise exception
        model.llm_client.call_llm_model = MagicMock(side_effect=Exception("API error"))
        
        with pytest.raises(RuntimeError, match="Error during solution generation: API error"):
            model.run()
    
    def test_llm_client_initialization(self):
        """Test that LLMAPIClient is properly initialized."""
        model = reasoning_model(self.task_config, "gpt-4", 10, 5, 3, 10)
        
        # Verify that llm_client is an instance of LLMAPIClient
        from src.utils.llm_api_client import LLMAPIClient
        assert isinstance(model.llm_client, LLMAPIClient)
