# Copyright 2026
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio

from verl.interactions.ver_k_retry_interaction import VerKRetryInteraction


def _make_simple_maze():
    # 2x2 grid with a single open edge from (0,0) -> (0,1) (Right).
    # connection_list[1][r][c] indicates an open edge to the right from (r, c).
    return {
        "grid_n": 2,
        "connection_list": [
            # vertical (down) connections
            [
                [0, 0],
                [0, 0],
            ],
            # horizontal (right) connections
            [
                [1, 0],
                [0, 0],
            ],
        ],
        "start_pos": [0, 0],
        "end_pos": [0, 1],
    }


def test_ver_k_retry_maze_correct_path_early_stop():
    config = {
        "max_attempts": 2,
        "verifier_type": "callable",
        "verifier_fn_path": "examples.reward_fns.maze_path_reward:verifier_fn",
        "answer_extraction": "answer_tag",
        "include_history": False,
    }
    interaction = VerKRetryInteraction(config)

    maze = _make_simple_maze()
    async def _run():
        await interaction.start_interaction(
            instance_id="maze_ok",
            query="Solve the maze.",
            ground_truth="RR",
            max_attempts=2,
            maze=maze,
        )
        messages = [{"role": "assistant", "content": "<answer>RR</answer>"}]
        return await interaction.generate_response("maze_ok", messages)

    should_terminate, response, reward, metadata = asyncio.run(_run())

    assert should_terminate is True
    assert reward == 1.0
    assert metadata.get("is_correct") is True
    assert metadata.get("attempt_idx") == 1
    assert response == "Correct. Stop."


def test_ver_k_retry_maze_incorrect_path_retry():
    config = {
        "max_attempts": 2,
        "verifier_type": "callable",
        "verifier_fn_path": "examples.reward_fns.maze_path_reward:verifier_fn",
        "answer_extraction": "answer_tag",
        "include_history": False,
    }
    interaction = VerKRetryInteraction(config)

    maze = _make_simple_maze()
    async def _run():
        await interaction.start_interaction(
            instance_id="maze_bad",
            query="Solve the maze.",
            ground_truth="RR",
            max_attempts=2,
            maze=maze,
        )
        messages = [{"role": "assistant", "content": "<answer>D</answer>"}]
        return await interaction.generate_response("maze_bad", messages)

    should_terminate, response, reward, metadata = asyncio.run(_run())

    assert should_terminate is False
    assert reward == 0.0
    assert metadata.get("is_correct") is False
    assert metadata.get("attempt_idx") == 1
    assert "incorrect" in response.lower()
