import numpy as np
import gym
from gym import spaces

class TwoState(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self):    
        # Defining the number of actions and states
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Discrete(2)

    def reset(self):
        self.pos = 0
        return self.pos

    def step(self, action):
        assert action == 0 or action == 1

        
        if action == 0:
            reward = 0.122
            if np.random.random() <= 0.66: # back to state 0
                self.pos = 0
            else:
                self.pos = 1
        else:
            reward = 0.033
            if np.random.random() <= 0.99: # back to state 1
                self.pos = 0
            else:
                self.pos = 1

        terminal = self.pos == 1
        return self.pos, reward, terminal, {}

    def numactions(self):
        return self.nactions
    
    def render(self, mode='human'):
        pass
    
    def close(self):
        pass
