import os
import random
import numpy as np

class Environment:
    def __init__(self):
        self.state_size = 2
        self.action_size = [2, 2]
        self.R_max = 5.0
        self.current_state = 0
        
        self.prob_threshold = 0.1
        self.R = np.array([[[-2.0, 5.0], [2.0, -2.0]], [[0.0, 0.0], [0.0, 0.0]]])


    def get_current_state(self):
        return self.current_state


    def init_environment(self):
        # return 0
        x = np.random.uniform(0, 1)
        if x < 0.5:
            self.current_state = 0
        else:
            self.current_state = 1


    def step(self, a, b):
        r = self.R[self.current_state][a][b]
        x = np.random.uniform(0, 1)
        if a == 0 and b == 1:
            if x < self.prob_threshold:
                self.current_state = 1
            else:
                self.current_state = 0
        else:
            if x >= self.prob_threshold:
                self.current_state = 1
            else:
                self.current_state = 0
        return self.current_state, r



