import pandas as pd
import numpy as np


class EnvironmentRedPillBluePill:
    def __init__(self, dist_2_prob=0.5):
        # initialize instance variables
        self.dist_1 = {'mean': -0.7, 'stdev': 0.05}
        self.dist_2a = {'mean': -1, 'stdev': 0.05}
        self.dist_2b = {'mean': -0.2, 'stdev': 0.05}
        self.dist_2_prob = dist_2_prob

        # start state
        self.start_state = np.random.choice(['redworld', 'blueworld'])

    def env_start(self, start_state=None):
        # return initial state
        if pd.isnull(start_state):
            return self.start_state
        else:
            return start_state

    def env_step(self, state, action, terminal=False):
        if action == 'red_pill':
            next_state = 'redworld'
        elif action == 'blue_pill':
            next_state = 'blueworld'

        if state == 'redworld':
            reward = np.random.normal(loc=self.dist_1['mean'], scale=self.dist_1['stdev'])
        elif state == 'blueworld':
            dist = np.random.choice(['dist2a', 'dist2b'], p=[self.dist_2_prob, 1 - self.dist_2_prob])
            if dist == 'dist2a':
                reward = np.random.normal(loc=self.dist_2a['mean'], scale=self.dist_2a['stdev'])
            elif dist == 'dist2b':
                reward = np.random.normal(loc=self.dist_2b['mean'], scale=self.dist_2b['stdev'])

        return min(0, reward), next_state, terminal
