import pyvirtualdisplay

_display = pyvirtualdisplay.Display(visible=False, size=(1400, 900))
_ = _display.start()

from FQE import FQE, initialize_zero
from FQE_utils import *
from NN import PolicyNet, QNet, net_param_num
from PG import PG
from PG_utils import CartPoleEnvR, set_seed
import gym
import torch
import numpy as np

env = CartPoleEnvR()
resize = T.Compose([T.ToPILImage(), T.Resize(40, interpolation=Image.CUBIC), T.ToTensor()]) # input 40 for 3x40x150; input 20 for 3x20x75
n_state = env.observation_space.shape[0]
n_action = env.action_space.n

policy_net = PolicyNet(n_state, n_action)
policy_net.load_state_dict(torch.load("target_policy_net.pickle"))
m = net_param_num(policy_net)

# set seed_init
seed_init = 1000
set_seed(env, seed_init)

K = 20000
H = 100
eps = 0     # this is the epsilon-greedy level of behavior policy
sample_iid_visual(env, resize, policy_net, eps, K, 1, H, 'data/IID_eps-' + str(eps) + '_K-' + str(K)) 
