"""Base policy interface for the FRAME system.

All policies should inherit from the Policy class and implement the required methods.
"""

from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional, Union, Tuple
import numpy as np

from frame.knowledge_base.knowledge_graph import KnowledgeGraph
from frame.environments.math_env import ValidAction, MathEnv


class Policy(ABC):
    """Abstract base class for policies that guide the mathematical discovery process."""
    
    def __init__(self, requires_enumeration: bool = True, seed: Optional[int] = None, **kwargs):
        """
        Initialize the policy.
        
        Args:
            requires_enumeration: Whether this policy requires enumeration of all valid actions
            seed: Random seed for reproducibility
            **kwargs: Additional parameters
        """
        self.requires_enumeration = requires_enumeration
        self.rng = np.random.RandomState(seed)
    
    def set_rng(self, rng: np.random.RandomState) -> None:
        """Set the random number generator for reproducibility."""
        self.rng = rng
    
    def set_rules(self, rules: List) -> None:
        """
        Set the production rules available to the policy.
        
        This method should be implemented by subclasses that need to know
        about the available production rules.
        
        Args:
            rules: List of production rules
        """
        pass
    
    @abstractmethod
    def select_action(self, env: MathEnv) -> Optional[ValidAction]:
        """
        Select an action based on the current environment state.
        
        Args:
            env: The math environment containing the current state
            
        Returns:
            A ValidAction object or None if no valid action is available
        """
        pass
    
    def update(self, 
              env: MathEnv,
              action: Union[int, ValidAction], 
              reward: float, 
              done: bool) -> None:
        """
        Update the policy based on the observed transition.
        
        Args:
            env: The math environment after taking the action
            action: Action that was taken
            reward: Reward received
            done: Whether the episode is done
        """
        # Default implementation does nothing
        pass
        
    def can_generate_actions(self) -> bool:
        """
        Return whether this policy can generate actions without enumeration.
        
        Returns:
            True if the policy can generate actions directly from the graph,
            False if it requires enumerated valid actions
        """
        return not self.requires_enumeration 