from rllm.environments.games.tower_of_hanoi import TowerOfHanoiEnv


def test_tower_of_hanoi_basic():
    """Basic test for Tower of Hanoi environment"""
    print("=" * 60)
    print("Test 1: Basic Tower of Hanoi (3 disks, 3 pegs)")
    print("=" * 60)
    
    task = {
        "num_disks": 3,
        "num_pegs": 3,
        "start_peg": 1,
        "target_peg": 3,
        "auxiliary_pegs": [2],
        "max_turns": 30,
    }
    
    env = TowerOfHanoiEnv(task=task)
    obs, info = env.reset()
    
    print("\nInitial Observation:")
    print(f"State ASCII:\n{obs['state_ascii']}")
    print(f"State String:\n{obs['state_string']}")
    print(f"Disk Names: {obs['disk_names']}")
    print(f"Start Peg: {obs['start_peg']}, Target Peg: {obs['target_peg']}")
    print(f"Auxiliary Pegs: {obs['auxiliary_pegs']}")
    print(f"Progress: {obs['progress']}")
    
    # Action 1: Move A (the smallest disk, at the bottom) from peg 1 to peg 3
    action1 = "ACTION:\n```\nmove('A', 1, 3)\n```"
    print("\n" + "-" * 60)
    print(f"Action 1: {action1}")
    obs, reward, done, info = env.step(action1)
    print(f"State String:\n{obs['state_string']}")
    print(f"Reward: {reward}")
    print(f"Done: {done}")
    print(f"Progress: {obs['progress']}")
    if obs.get('env_message'):
        print(f"Env Message: {obs['env_message']}")
    
    # Action 2: Move B from peg 1 to peg 2
    action2 = "ACTION:\n```\nmove('B', 1, 2)\n```"
    print("\n" + "-" * 60)
    print(f"Action 2: {action2}")
    obs, reward, done, info = env.step(action2)
    print(f"State String:\n{obs['state_string']}")
    print(f"Reward: {reward}")
    print(f"Done: {done}")
    print(f"Progress: {obs['progress']}")
    
    # Action 3: Move A from peg 3 to peg 2
    action3 = "ACTION:\n```\nmove('A', 3, 2)\n```"
    print("\n" + "-" * 60)
    print(f"Action 3: {action3}")
    obs, reward, done, info = env.step(action3)
    print(f"State String:\n{obs['state_string']}")
    print(f"Reward: {reward}")
    print(f"Done: {done}")
    print(f"Progress: {obs['progress']}")


def test_tower_of_hanoi_4pegs():
    """Test Tower of Hanoi with 4 pegs"""
    print("\n" + "=" * 60)
    print("Test 2: Tower of Hanoi with 4 pegs")
    print("=" * 60)
    
    task = {
        "num_disks": 3,
        "num_pegs": 4,
        "start_peg": 2,
        "target_peg": 4,
        "auxiliary_pegs": [1, 3],
        "max_turns": 30,
    }
    
    env = TowerOfHanoiEnv(task=task)
    obs, info = env.reset()
    
    print("\nInitial Observation:")
    print(f"State String:\n{obs['state_string']}")
    print(f"Num Pegs: {obs['num_pegs']}")
    print(f"Start Peg: {obs['start_peg']}, Target Peg: {obs['target_peg']}")
    print(f"Auxiliary Pegs: {obs['auxiliary_pegs']}")
    
    # Action test: Move A (the smallest disk, at the bottom)
    action = "ACTION:\n```\nmove('A', 2, 4)\n```"
    print("\n" + "-" * 60)
    print(f"Action: {action}")
    obs, reward, done, info = env.step(action)
    print(f"State String:\n{obs['state_string']}")
    print(f"Reward: {reward}")
    print(f"Progress: {obs['progress']}")


def test_invalid_action():
    """Test invalid actions"""
    print("\n" + "=" * 60)
    print("Test 3: Invalid Action Handling")
    print("=" * 60)
    
    task = {
        "num_disks": 3,
        "num_pegs": 3,
        "start_peg": 1,
        "target_peg": 3,
        "max_turns": 30,
    }
    
    env = TowerOfHanoiEnv(task=task)
    obs, info = env.reset()
    
    print("\nInitial State:")
    print(f"State String:\n{obs['state_string']}")
    # Initial state: C, B, A (C on top, A on the bottom)
    
    # First move: Move A (valid action)
    action1 = "ACTION:\n```\nmove('A', 1, 2)\n```"
    print("\n" + "-" * 60)
    print(f"Valid Action (move A first): {action1}")
    obs, reward, done, info = env.step(action1)
    print(f"State String:\n{obs['state_string']}")
    print(f"Reward: {reward}")
    print(f"Progress: {obs['progress']}")
    
    # Invalid action: Try moving the large disk (C) on top of the small disk (B)
    # Current state: C, B on peg 1 / A on peg 2
    action = "ACTION:\n```\nmove('C', 1, 2)\n```"
    print("\n" + "-" * 60)
    print(f"Invalid Action (trying to move C on top of A, but C is larger): {action}")
    obs, reward, done, info = env.step(action)
    print(f"State String:\n{obs['state_string']}")
    print(f"Reward: {reward}")
    print(f"Env Message: {obs.get('env_message', 'None')}")
    
    # Valid action: Move B
    action2 = "ACTION:\n```\nmove('B', 1, 3)\n```"
    print("\n" + "-" * 60)
    print(f"Valid Action: {action2}")
    obs, reward, done, info = env.step(action2)
    print(f"State String:\n{obs['state_string']}")
    print(f"Reward: {reward}")
    print(f"Progress: {obs['progress']}")


def test_action_parsing():
    """Test various action parsing formats"""
    print("\n" + "=" * 60)
    print("Test 4: Action Parsing Formats")
    print("=" * 60)
    
    task = {
        "num_disks": 2,
        "num_pegs": 3,
        "start_peg": 1,
        "target_peg": 3,
        "max_turns": 30,
    }
    
    env = TowerOfHanoiEnv(task=task)
    obs, info = env.reset()
    
    # Test various action formats
    # For 2 disks: Initial state is B, A (B on top, A at the bottom)
    # A should be moved first
    action_formats = [
        "ACTION:\n```\nmove('A', 1, 3)\n```",  # single quotes
        "ACTION:\n```\nmove(\"A\", 1, 2)\n```",  # double quotes
        "ACTION:\n```\nmove(A, 1, 3)\n```",  # no quotes
    ]
    
    for i, action in enumerate(action_formats, 1):
        print(f"\nAction Format {i}: {action}")
        try:
            obs, reward, done, info = env.step(action)
            print(f"✓ Parsed successfully")
            print(f"Reward: {reward}")
            print(f"State: {obs['state_string']}")
        except Exception as e:
            print(f"✗ Error: {e}")


def test_complete_solution():
    """Test complete solution (2 disks)"""
    print("\n" + "=" * 60)
    print("Test 5: Complete Solution (2 disks)")
    print("=" * 60)
    
    task = {
        "num_disks": 2,
        "num_pegs": 3,
        "start_peg": 1,
        "target_peg": 3,
        "max_turns": 30,
    }
    
    env = TowerOfHanoiEnv(task=task)
    obs, info = env.reset()
    
    print("\nInitial State:")
    print(f"State String:\n{obs['state_string']}")
    
    # Optimal solution: Move 2 disks from peg 1 to peg 3
    # Initial state: B, A (B on top, A at the bottom)
    # A (smallest disk), B (large disk)
    solution = [
        "ACTION:\n```\nmove('A', 1, 2)\n```",  # Move A from peg 1 -> peg 2
        "ACTION:\n```\nmove('B', 1, 3)\n```",  # Move B from peg 1 -> peg 3
        "ACTION:\n```\nmove('A', 2, 3)\n```",  # Move A from peg 2 -> peg 3
    ]
    
    for i, action in enumerate(solution, 1):
        print(f"\n--- Step {i} ---")
        print(f"Action: {action}")
        obs, reward, done, info = env.step(action)
        print(f"State String:\n{obs['state_string']}")
        print(f"Reward: {reward}")
        print(f"Progress: {obs['progress']}")
        print(f"Done: {done}")
        if done:
            print(f"Termination Reason: {obs.get('termination_reason', 'None')}")
            break


if __name__ == "__main__":
    test_tower_of_hanoi_basic()
    test_tower_of_hanoi_4pegs()
    test_invalid_action()
    test_action_parsing()
    test_complete_solution()
    
    print("\n" + "=" * 60)
    print("All tests completed!")
    print("=" * 60)

