from rllm.environments.games.rush_hour import RushHourEnv


def test_rush_hour_basic():
    """Basic Rush Hour environment test"""
    print("=" * 60)
    print("Test 1: Basic Rush Hour")
    print("=" * 60)
    
    task = {
        "board_config": "oxCCDDooxoMoIoAAMoIoKLFFJoKLooJGGHHH",
        "rows": 6,
        "cols": 6,
        "target_car": "A",
        "max_turns": 30,
    }
    
    env = RushHourEnv(task=task)
    obs, info = env.reset()
    
    print("\nInitial Observation:")
    print(f"Board:\n{obs['board_ascii']}")
    print(f"Target Car: {obs['target_car']}")
    print(f"Progress: {obs['progress']}")
    
    # First action: Move car A to the right by 2 spaces
    action1 = "ACTION:\n```\nmove('A', 'right', 2)\n```"
    print("\n" + "-" * 60)
    print(f"Action 1: {action1}")
    obs, reward, done, info = env.step(action1)
    print(f"Board:\n{obs['board_ascii']}")
    print(f"Reward: {reward}")
    print(f"Done: {done}")
    print(f"Progress: {obs['progress']}")
    if obs.get('env_message'):
        print(f"Env Message: {obs['env_message']}")


def test_rush_hour_action_formats():
    """Test various action formats"""
    print("\n" + "=" * 60)
    print("Test 2: Action Format Parsing")
    print("=" * 60)
    
    task = {
        "board_config": "oxCCDDooxoMoIoAAMoIoKLFFJoKLooJGGHHH",
        "rows": 6,
        "cols": 6,
        "target_car": "A",
        "max_turns": 30,
    }
    
    env = RushHourEnv(task=task)
    obs, info = env.reset()
    
    # Test various action formats
    action_formats = [
        "ACTION:\n```\nmove('A', 'right')\n```",  # Single quotes, default 1 space
        "ACTION:\n```\nmove('A', 'right', 2)\n```",  # Move by multiple spaces
        "ACTION:\n```\nmove(\"B\", \"left\", 3)\n```",  # Double quotes, multiple spaces
        "ACTION:\n```\nmove(C, down, 1)\n```",  # No quotes
        "ACTION:\n```\nmove(aa, right, 4)\n```",  # No quotes, multi-char vehicle
        "ACTION:\n```\nmove(AA, right, 2)\n```",  # No quotes, uppercase AA
    ]
    
    for i, action in enumerate(action_formats, 1):
        print(f"\nAction Format {i}: {action}")
        try:
            obs, reward, done, info = env.step(action)
            print(f"✓ Parsed successfully")
            print(f"Reward: {reward}")
            print(f"Board:\n{obs['board_ascii']}")
        except Exception as e:
            print(f"✗ Error: {e}")


def test_invalid_action():
    """Test invalid actions"""
    print("\n" + "=" * 60)
    print("Test 3: Invalid Action Handling")
    print("=" * 60)
    
    task = {
        "board_config": "oxCCDDooxoMoIoAAMoIoKLFFJoKLooJGGHHH",
        "rows": 6,
        "cols": 6,
        "target_car": "A",
        "max_turns": 30,
    }
    
    env = RushHourEnv(task=task)
    obs, info = env.reset()
    
    print("\nInitial State:")
    print(f"Board:\n{obs['board_ascii']}")
    
    # Invalid action: Attempting to move a vertical car left/right,
    # or attempting to move into a wall or another vehicle
    action = "ACTION:\n```\nmove('G', 'right')\n```"
    print("\n" + "-" * 60)
    print(f"Action (may be invalid): {action}")
    obs, reward, done, info = env.step(action)
    print(f"Board:\n{obs['board_ascii']}")
    print(f"Reward: {reward}")
    print(f"Env Message: {obs.get('env_message', 'None')}")


def test_vehicle_movement():
    """Test vehicle movement - successful example"""
    print("\n" + "=" * 60)
    print("Test 4: Vehicle Movement (Successful Solution)")
    print("=" * 60)
    
    task = {
        "board_config": "oxCCDDooxoMoIoAAMoIoKLFFJoKLooJGGHHH",
        "rows": 6,
        "cols": 6,
        "target_car": "A",
        "max_turns": 30,
    }
    
    env = RushHourEnv(task=task)
    obs, info = env.reset()
    
    print("\nInitial State:")
    print(f"Board:\n{obs['board_ascii']}")
    print(f"Target Car: {obs['target_car']}")
    print(f"Progress: {obs['progress']}")
    
    # Successful solution: actually working moves
    # In order to solve this puzzle completely, a complicated sequence is needed,
    # but here we show actually working moves for testing.
    # 
    # Actual solution sequence (simplified version):
    # 1. Move I up to free space
    # 2. Move other vehicles to open path for A
    # 3. Move A to the right
    
    solution = [
        "ACTION:\n```\nmove('I', 'up')\n```",  # Move I up (actually works)
        "ACTION:\n```\nmove('J', 'up', 1)\n```",
        "ACTION:\n```\nmove('G', 'left', 1)\n```",
        "ACTION:\n```\nmove('K', 'down', 1)\n```",
        "ACTION:\n```\nmove('A', 'left', 1)\n```",
        "ACTION:\n```\nmove('L', 'up', 2)\n```",
        "ACTION:\n```\nmove('F', 'left', 3)\n```",
        "ACTION:\n```\nmove('L', 'down', 2)\n```",
        "ACTION:\n```\nmove('M', 'down', 2)\n```",
        "ACTION:\n```\nmove('A', 'right', 3)\n```",
    ]
    
    # Note: To fully solve the puzzle, more steps are required.
    # This test just demonstrates actually working moves.
    
    for i, action in enumerate(solution, 1):
        print(f"\n--- Step {i} ---")
        print(f"Action: {action}")
        obs, reward, done, info = env.step(action)
        print(f"Board:\n{obs['board_ascii']}")
        print(f"Reward: {reward}")
        print(f"Progress: {obs['progress']}")
        print(f"Done: {done}")
        if obs.get('env_message'):
            print(f"Env Message: {obs['env_message']}")
        if done:
            print(f"Termination Reason: {obs.get('termination_reason', 'None')}")
            if obs.get('termination_reason') == 'PUZZLE_COMPLETE':
                print("✓ Puzzle solved successfully!")
            break


if __name__ == "__main__":
    test_rush_hour_basic()
    test_rush_hour_action_formats()
    test_invalid_action()
    test_vehicle_movement()
    
    print("\n" + "=" * 60)
    print("All tests completed!")
    print("=" * 60)

