import numpy as np
import pdb
from solver.policy.base import FeedbackPolicy


class DiscretizedGraphonFeedbackPolicy(FeedbackPolicy):
    """
    Implements a finite action space graphon feedback policy.
    """

    def __init__(self, state_space, action_space, policy):
        super().__init__(state_space, action_space)
        self.policy = policy
        self.alphas = np.array([.5])
        
    @property
    def policy_array(self):
        """
        Access the policy_array from the wrapped policy
        """
        if hasattr(self.policy, 'policy_array'):
            return self.policy.policy_array
        return None

    def act(self, t, x):
        """
        At time t, act on observation x to obtain random action u
        :param t: time
        :param x: observation
        :return: action
        """
        pmf = self.pmf(t, x)
        return np.random.choice(range(len(pmf)), 1, p=pmf).item()

    def pmf(self, t, x):
        """
        At time t, act on observation x to obtain action pmf
        :param t: time
        :param x: observation
        :return: action pmf
        """
        return self.policy.pmf(t, x)
