from rllm.environments.games.sudoku_env import SudokuEnv

def test_sudoku_env():
    env = SudokuEnv(task={
        "rows": 4,
        "cols": 4,
        "initial_board": "................",
        "solution": "1234123412341234",
        "max_turns": 30,
        "progress_reward_type": "progress",
    })
    env.reset()
    # response = "<think>\nI need to solve the puzzle.\n</think>REASON:\nI will solve the puzzle step by step.\nACTION:\n```\ncandidate('-', 2, r1c1)\ncandidate('+', 3, r1c1)\ncandidate('-', 3, r1c1)\ncandidate('+', 4, r1c1)\ncandidate('-', 6, r1c1)\ncandidate('-', 9, r2c2)\n```"
    response = "<think>\nI need to solve the puzzle.\n</think>REASON:\nI will solve the puzzle step by step.\nACTION:\n```\ncandidate('-', 2, r1c1)\n```"
    obs, reward, done, info = env.step(response)
    print(obs)
    print(reward)
    print(done)
    print(info)

if __name__ == "__main__":
    test_sudoku_env()