import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import argparse
import gym
import numpy as np
import pickle

max_ep_len = 1000
# Evaluate with no shift
class Simulator(object):
    def __init__(self, params):
        self.max_ep_len = params['max_ep_len']
        self.env_name = params['env_name']
        self.num_episodes = params['num_episodes']
        self.render = params['render']
        self.load_policy()

    def simulate_model(self):
        # Create new env for each child process
        env = gym.make(self.env_name)
        state = env.reset()
        rewards = np.zeros(self.num_episodes)

        for it in range(self.num_episodes):
            state = env.reset()
            ep_reward = 0
            done = False
            for i in range(max_ep_len):
                if self.render : env.render()

                action = self.policy.get_action(state)
                state, reward, done, _ = env.step(action)
                
                ep_reward += reward
                if done:
                    rewards[it] = ep_reward
                    break
        print(f"Simulated Mean: {np.mean(rewards)}, Simulated SD: {np.std(rewards)}")
        env.close()
    
    def load_policy(self):
        policy_file = open(f'./TrainedPolicies/{self.env_name}_finalModel.obj', 'rb')
        self.policy = pickle.load(policy_file)
        policy_file.close()

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--env_name', type=str, default="LunarLanderContinuous-v2")
    parser.add_argument('--max_ep_len', type=int, default=1000)
    parser.add_argument('--num_episodes', type=int, default=100)
    parser.add_argument('--render', type=bool, default=False)

    args = parser.parse_args()
    params = vars(args)

    sim = Simulator(params)

    sim.simulate_model()