from abc import abstractmethod

import numpy as np
import pdb
from solver.policy.finite_policy import FiniteFeedbackPolicy, normalize


class ArrayPolicy(FiniteFeedbackPolicy):
    """
    Implements a finite action space feedback policy.
    """

    def __init__(self,
                 time_steps,
                 state_space,
                 action_space):
        super().__init__(state_space, action_space)
        self.policy_array = np.full((time_steps,
                                     state_space[-1].n,
                                     action_space.n),
                                       1 / action_space.n)

    def pmf(self, t, x):
        """
        At time t, act on observation x to obtain action pmf
        :param t: time
        :param x: observation
        :return: action pmf
        """
        return self.policy_array[t, x]
