from rllm.environments.games.sudoku_env import SudokuEnv

def test_sudoku_env():
    env = SudokuEnv(task={
        "rows": 4,
        "cols": 4,
        "initial_board": "................",
        "solution": "1234123412341234",
        "max_turns": 30,
        "progress_reward_type": "progress",
    })
    env.reset()
    response = "<think>\nI need to solve the puzzle.\n</think>REASON:\nI will solve the puzzle step by step.\nACTION:\n```\nvalue(1, r1c1)\nvalue(2, r1c2)\nvalue(3, r1c3)\nvalue(4, r1c4)\n```"
    obs, reward, done, info = env.step(response)
    print(obs)
    print(reward)
    print(done)
    print(info)

if __name__ == "__main__":
    test_sudoku_env()