import os
import random
import numpy as np

class Environment:
    def __init__(self):
        self.num_agent = 6
        self.state_size = 2
        self.action_size = [2 for _ in range(self.num_agent)]
        self.R_max = 5.0
        self.current_state = 0
        
        self.prob_threshold = 0.1


    def get_current_state(self):
        return self.current_state

    
    def get_reward(self, pi):
        if self.current_state == 1:
            return 0.0
        else:
            prob = pi[0][0]
            for agent in range(1, self.num_agent):
                prob *= pi[agent][1]
            return self.R_max * prob


    def init_environment(self):
        # return 0
        x = np.random.uniform(0, 1)
        if x < 0.5:
            self.current_state = 0
        else:
            self.current_state = 1


    def step(self, a):
        r = 0.0
        flag = True
        if a[0] != 0:
            flag = False 
        for agent in range(1, self.num_agent):
            if a[agent] != 1:
                flag = False
        if self.current_state == 1:
            if flag:
                r = 5.0
            else:
                r = 0.0

        x = np.random.uniform(0, 1)
        if flag:
            if x < self.prob_threshold:
                self.current_state = 1
            else:
                self.current_state = 0
        else:
            if x >= self.prob_threshold:
                self.current_state = 1
            else:
                self.current_state = 0
        return self.current_state, r



