import numpy as np

from train_bc import RunnerBC
from train_uncertainty import RunnerUncertainty
from train_rl import RunnerRL

def collect_episode(env, actor, atari=False):
    observation_list, action_list, reward_list, step_type_list = [],[],[],[]
    
    # Reset any counts and start the environment.
    timestep = env.reset()
    s = timestep.observation
    a = actor.select_action(s)
    st = timestep.step_type

    if atari:
        lives = env._env.lives

    # Run an episode.
    while not timestep.last():
        if atari and env._env.lives < lives:
            print(lives)
            a = 1
            lives = env._env.lives

        # Generate an action from the agent's policy and step the environment.
        timestep = env.step(a)
        sp = timestep.observation
        ap = actor.select_action(sp)
        stp = timestep.step_type

        observation_list.append([s,sp])
        action_list.append([a,ap])
        reward_list.append([timestep.reward])
        step_type_list.append([st, stp])

        s = sp
        a = ap
        st = timestep.step_type

    return np.array(observation_list), np.array(action_list), np.array(reward_list)


def load_rl_runner(load_path, step, params):
    rl_runner = RunnerRL(params)
    rl_runner.load(load_path, step)
    return rl_runner

def load_bc_runner(load_path, step, params):
    bc_runner = RunnerBC(params)
    bc_runner.load(load_path, step)
    return bc_runner

def load_unc_runner(load_path, step, params):
    unc_runner = RunnerUncertainty(params)
    unc_runner.load(load_path, step)
    return unc_runner

