from abc import ABC, abstractmethod
from typing import Tuple, Union

import numpy as np

# base class for Markov Reward Process


class MRP(ABC):
    def __init__(self, n_states: int, gamma: float) -> None:
        super().__init__()
        self.n_states = n_states
        self.gamma = gamma

    @abstractmethod
    def reset(self) -> Union[int, np.ndarray]:
        pass

    @abstractmethod
    def step(self, state: Union[int, np.ndarray]) -> Tuple[Union[int, np.ndarray], float]:
        pass

    @abstractmethod
    def get_value(self) -> np.ndarray:
        pass

    @abstractmethod
    def get_steady_d(self) -> np.ndarray:
        pass

    @abstractmethod
    def get_feature_index(self, state: Union[int, np.ndarray]) -> int:
        pass

    def mc(self, s: Union[int, np.ndarray], steps: int) -> float:
        '''
        param s: initial state
        param steps: number of steps to unroll
        return: truncated Monte Carlo return
        '''
        total_reward = 0.0
        discount = 1.0
        for _ in range(steps):
            s, r = self.step(s)
            total_reward += discount * r
            discount *= self.gamma
        return total_reward
