"""
This module defines data structures and base classes for reward calculations
to evaluate model responses for various problem types, including math and coding.
"""

from dataclasses import dataclass
from enum import Enum

@dataclass
class RewardConfig:
    # Config for math-bsed rewards
    math_reward_weight: float = 1.0
    use_math_orm: bool = False
    ignore_think: bool = False

    # Config for code-based rewards
    code_reward_weight: float = 1.0
    
    # Config for cot-based rewards
    cot_reward_weight: float = 0.0
    
    # General reward constants
    correct_reward: float = 1.0
    incorrect_reward: float = 0.0
    format_error_reward: float = 0.0
    unk_error_reward: float = 0.0
    
    # Bonus reward for calling tools.
    toolcall_bonus: float = 0.5


class RewardType(Enum):
    """
    Enum class representing the different types of rewards that can be assigned.

    Attributes:
        MATH (str): Represents a math-related problem type.
        CODE (str): Represents a coding-related problem type.
        UNK (str): Represents an unknown or unclassified problem type.
    """
    MATH = 'MATH'
    CODE = 'CODE'
    UNK = 'UNK'

@dataclass(slots=True, kw_only=True)
class RewardInput:
    """Data structure for input required to calculate rewards.

    Attributes:
        problem (str): The original problem text or prompt provided to the model.
        problem_type (RewardType): The category of the problem (e.g., math, code) to be evaluated.
        data_source (str): The source of the data (e.g., dataset name) used for evaluation.
        model_response (str): The response generated by the model that needs evaluation.
        metadata (dict): Additional contextual information necessary for evaluation:
            - For math problems: This may include the ground truth answer.
            - For coding problems: This may include unit tests to validate the solution.
    """
    problem: str
    problem_type: RewardType = RewardType.UNK
    data_source: str
    model_response: str
    metadata: dict
    """
    for code dataset

    
    #codeforces
    metadata['test_cases'] = [[ { "input": "3 6 9", "output": "6" }, { "input": "4 4 4", "output": "4" }, { "input": "0 0 0", "output": "0" }]

    #codetest
    metadata[['public_tests'] = { 
    { "input": [ "3\n((()))\n(())()\n()(()" ],
	 "output": [ "YES\nYES\nNO" ] } }

    # apps/ TACO
    metadata[["input_output"] =  {
    { "inputs": [ "8\n5 2\nWLWLL\n6 5\nLLLWWL\n7 1\nLWLWLWL\n15 5\nWWWLLLWWWLLLWWW\n40 7\nLLWLWLWWWLWLLWLWWWLWLLWLLWLLLLWLLWWWLWWL\n1 0\nL\n1 1\nL\n6 1\nWLLWLW\n" ],
    "outputs": [ "7\n11\n6\n26\n46\n0\n1\n6\n" ] }
    }
    """

@dataclass(slots=True, kw_only=True)
class LiveCodebenchInput:
    """Data structure for input required to calculate rewards.
    
    """
    problem_type: RewardType = RewardType.CODE
    question: str
    generation_code: str
    problem: dict
    difficult:str='easy'

@dataclass(slots=True, kw_only=True)
class RewardOutput:
    """Data structure for the output of reward calculations.

    Attributes:
        reward (float): The computed reward value based on the evaluation of the model's response.
        is_correct (bool): A boolean flag indicating whether the model's response is deemed correct.
    """
    reward: float
    is_correct: bool


class RewardFn:
    """Abstract base class for defining reward calculation strategies.

    This class should be subclassed to implement specific reward calculation logic.
    The __call__ method must be overridden to provide the functionality for evaluating
    the input and returning the corresponding reward output.
    """
    def __init__(self, config: RewardConfig):
        self.config = config

    def __call__(self, input: RewardInput) -> RewardOutput:
        raise NotImplementedError("Subclasses must implement this method.")

