'''Script used to play with trained agents.'''

import argparse
import os
import torch

import numpy as np
import yaml

import tonic  # noqa
from matplotlib import pyplot as plt 
N = 100
def func(env):
    print(env)
    if 'ostrich' in env:
        return lambda: eval(env)

    def build_env(identifier=0):
        build = env[:-1]
        build = build + f', identifier={identifier})'
        return eval(build)
    return build_env


def play_control_suite(agent, environment, obs_height):
    '''Launches an agent in a DeepMind Control Suite-based environment.'''

    from dm_control import viewer
    class Wrapper:
        '''Wrapper used to plug a Tonic environment in a dm_control viewer.'''

        def __init__(self, environment):
            self.environment = environment
            self.unwrapped = environment.unwrapped
            self.action_spec = self.unwrapped.environment.action_spec
            self.physics = self.unwrapped.environment.physics
            self.infos = None
            self.steps = 0
            self.episodes = 0
            self.min_reward = float('inf')
            self.max_reward = -float('inf')
            self.global_min_reward = float('inf')
            self.global_max_reward = -float('inf')

        def reset(self):
            '''Mimics a dm_control reset for the viewer.'''
            self.observations = self.environment.reset()[None]
            self.muscles_dep = self.environment.muscles_dep

            self.score = 0
            self.length = 0
            self.min_reward = float('inf')
            self.max_reward = -float('inf')
            self.max_vel = -100
            self.physics.data.qpos[-1] += np.random.normal(0, 0.2)

            return self.unwrapped.last_time_step

        def step(self, actions):
            '''Mimics a dm_control step for the viewer.'''

            assert not np.isnan(actions.sum())
            ob, rew, term, _ = self.environment.step(actions[0])
            self.score += rew
            self.length += 1
            self.min_reward = min(self.min_reward, rew)
            self.max_reward = max(self.max_reward, rew)
            self.global_min_reward = min(self.global_min_reward, rew)
            self.global_max_reward = max(self.global_max_reward, rew)
            if self.physics.horizontal_velocity() > self.max_vel:
                self.max_vel = self.physics.horizontal_velocity()
            timeout = self.length == self.environment.max_episode_steps
            done = term or timeout

            if done:
                self.episodes += 1
                print()
                print(f'Episodes: {self.episodes:,}')
                print(f'Score: {self.score:,.3f}')
                print(f'Length: {self.length:,}')
                print(f'Terminal: {term:}')
                print(f'Min reward: {self.min_reward:,.3f}')
                print(f'Max reward: {self.max_reward:,.3f}')
                print(f'Global min reward: {self.min_reward:,.3f}')
                print(f'Global max reward: {self.max_reward:,.3f}')
                print(f'Max velocity: {self.max_vel:,.3f}')
                print(f'Successfully passed obstacles: {self.physics.data.qpos[0] > 20}')


            self.observations = ob[None]
            self.muscles_dep = self.environment.muscles_dep
            self.infos = dict(
                observations=ob[None], rewards=np.array([rew]),
                resets=np.array([done]), terminations=np.array([term]))
            self.done = done
            return self.unwrapped.last_time_step

    # Wrap the environment for the viewer.
    environment = Wrapper(environment)
    def flatten(observation):
        '''Turns OrderedDict observations into vectors.'''
        observation = [np.array([o]) if np.isscalar(o) else o.ravel()
                       for o in observation.values()]
        return np.concatenate(observation, axis=0)


    def policy(timestep):
        '''Mimics a dm_control policy for the viewer.'''

        if environment.infos is not None:
            agent.test_update(**environment.infos, steps=environment.steps)
            environment.steps += 1
        # action = agent.test_step(environment.observations, environment.steps, environment.muscles_dep)
        return agent.test_step(environment.observations, environment.steps, environment.muscles_dep)
        # return agent.test_step(environment.observations, environment.steps)

    # Launch the viewer with the wrapped environment and policy.
    # viewer.launch(environment, policy)
    #EPISODES = 50
    EPISODES = N
    rets = []

    min_pes = 1000
    max_pes = -1000
    for i in range(EPISODES):
        ep_return = 0
        state = flatten(environment.reset().observation)
        ep_steps = 0
        while True:
            action = policy(state)
            timestep = environment.step(action) 
            next_state = flatten(timestep.observation)
            ep_return += timestep.reward 
            if environment.done:
                print(environment.physics.data.qpos[0] > 22)
                print(f'made it {environment.physics.data.qpos[0]} meters')
                rets.append(environment.physics.data.qpos[0] > 10)
                # rets.append(environment.max_vel)
                break
            state = next_state.copy()
            ep_steps += 1
    descriptor = 'td4_no_correction'
    np.save(f'stepdown_{descriptor}_{obs_height}.npy', rets)
    return np.mean(rets)


def play(path, checkpoint, seed, header, agent, environment):
    '''Reloads an agent and an environment from a previous experiment.'''

    checkpoint_path = None

    if path:
        tonic.logger.log(f'Loading experiment from {path}')

        # Use no checkpoint, the agent is freshly created.
        if checkpoint == 'none' or agent is not None:
            tonic.logger.log('Not loading any weights')

        else:
            checkpoint_path = os.path.join(path, 'checkpoints')
            if not os.path.isdir(checkpoint_path):
                tonic.logger.error(f'{checkpoint_path} is not a directory')
                checkpoint_path = None

            # List all the checkpoints.
            checkpoint_ids = []
            for file in os.listdir(checkpoint_path):
                if file[:5] == 'step_':
                    checkpoint_id = file.split('.')[0]
                    checkpoint_ids.append(int(checkpoint_id[5:]))

            if checkpoint_ids:
                # Use the last checkpoint.
                if checkpoint == 'last':
                    checkpoint_id = max(checkpoint_ids)
                    checkpoint_path = os.path.join(
                        checkpoint_path, f'step_{checkpoint_id}')

                # Use the specified checkpoint.
                else:
                    checkpoint_id = int(checkpoint)
                    if checkpoint_id in checkpoint_ids:
                        checkpoint_path = os.path.join(
                            checkpoint_path, f'step_{checkpoint_id}')
                    else:
                        tonic.logger.error(f'Checkpoint {checkpoint_id} '
                                           f'not found in {checkpoint_path}')
                        checkpoint_path = None

            else:
                tonic.logger.error(f'No checkpoint found in {checkpoint_path}')
                checkpoint_path = None

        # Load the experiment configuration.
        arguments_path = os.path.join(path, 'config.yaml')
        with open(arguments_path, 'r') as config_file:
            config = yaml.load(config_file, Loader=yaml.FullLoader)
        config = argparse.Namespace(**config)
        print(config)
        header = header or config.header
        agent = agent or config.agent
        environment = environment or config.test_environment
        environment = environment or config.environment

    # Run the header first, e.g. to load an ML framework.
    if header:
        exec(header)

    # Build the agent.
    if not agent:
        raise ValueError('No agent specified.')
    agent = eval(agent)

    # Build the environment.
    env_old = environment
    init = False
    for obs_height in ['02', '005', '01', '015', '02', '025']:
        environment = env_old[:-1]
        environment = environment + (f', environment_kwargs=dict(obs_height="{obs_height}"))')
        environment = func(environment)()
        environment.seed(seed)
        if not init:
            # Initialize the agent.
            agent.initialize(
                observation_space=environment.observation_space,
                action_space=environment.action_space, seed=seed)

            # Load the weights of the agent form a checkpoint.
            if checkpoint_path:
                agent.load(checkpoint_path, play=True)
            init = True

        # Play with the agent in the environment.
        if isinstance(environment, tonic.environments.wrappers.ActionRescaler):
            environment_type = environment.env.__class__.__name__
        else:
            environment_type = environment.__class__.__name__
        play_control_suite(agent, environment, obs_height)


if __name__ == '__main__':
    # Argument parsing.
    parser = argparse.ArgumentParser()
    parser.add_argument('--path')
    parser.add_argument('--checkpoint', default='last')
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--header')
    parser.add_argument('--agent')
    parser.add_argument('--environment', '--env')
    args = vars(parser.parse_args())
    play(**args)
