'''Script used to play with trained agents.'''

import argparse
import os
import torch

import numpy as np
import yaml

import tonic  # noqa
from matplotlib import pyplot as plt
import cv2

def grabFrame(env):
    # Get RGB rendering of env
    rgbArr = env.physics.render(600, 480, camera_id=0)
    # Convert to BGR for use with OpenCV
    return cv2.cvtColor(rgbArr, cv2.COLOR_BGR2RGB)


N = 1
def func(env):
    print(env)
    if 'ostrich' in env:
        return lambda: eval(env)

    def build_env(identifier=0):
        build = env[:-1]
        build = build + f', identifier={identifier})'
        return eval(build)
    return build_env


def play_control_suite(agent, environment):
    '''Launches an agent in a DeepMind Control Suite-based environment.'''

    from dm_control import viewer

    class Wrapper:
        '''Wrapper used to plug a Tonic environment in a dm_control viewer.'''

        def __init__(self, environment):
            self.environment = environment
            self.unwrapped = environment.unwrapped
            self.action_spec = self.unwrapped.environment.action_spec
            self.physics = self.unwrapped.environment.physics
            self.infos = None
            self.steps = 0
            self.episodes = 0
            self.min_reward = float('inf')
            self.max_reward = -float('inf')
            self.global_min_reward = float('inf')
            self.global_max_reward = -float('inf')
            # self.pca = torch.load('./param_files/pca_matrix_30.pt')

        def reset(self):
            '''Mimics a dm_control reset for the viewer.'''
            self.observations = self.environment.reset()[None]
            self.muscles_dep = self.environment.muscles_dep

            self.score = 0
            self.length = 0
            self.min_reward = float('inf')
            self.max_reward = -float('inf')
            self.max_vel = -100
            self.lce = []
            self.m_act = []
            self.actions = []

            return self.unwrapped.last_time_step

        def step(self, actions):
            '''Mimics a dm_control step for the viewer.'''

            assert not np.isnan(actions.sum())
            # actions = np.einsum('ki,ji->kj', actions[:,:6], self.pca)
            # lowd_action = np.random.normal(0, 1, size=(1, 30))
            # actions = np.einsum('ki,ji->kj', lowd_action, self.pca)
            ob, rew, term, _ = self.environment.step(actions[0])
            self.lce.append(self.physics.tendon_states()[:N].copy())
            self.m_act.append(self.physics.muscle_activations()[:N].copy())
            self.actions.append(actions[:N].copy())
            self.score += rew
            self.length += 1
            self.min_reward = min(self.min_reward, rew)
            self.max_reward = max(self.max_reward, rew)
            self.global_min_reward = min(self.global_min_reward, rew)
            self.global_max_reward = max(self.global_max_reward, rew)
            if self.physics.horizontal_velocity() > self.max_vel:
                self.max_vel = self.physics.horizontal_velocity()
            timeout = self.length == self.environment.max_episode_steps
            done = term or timeout

            if done:
                self.episodes += 1
                print()
                print(f'Episodes: {self.episodes:,}')
                print(f'Score: {self.score:,.3f}')
                print(f'Length: {self.length:,}')
                print(f'Terminal: {term:}')
                print(f'Min reward: {self.min_reward:,.3f}')
                print(f'Max reward: {self.max_reward:,.3f}')
                print(f'Global min reward: {self.min_reward:,.3f}')
                print(f'Global max reward: {self.max_reward:,.3f}')
                print(f'Max velocity: {self.max_vel:,.3f}')

            self.observations = ob[None]
            self.muscles_dep = self.environment.muscles_dep
            self.infos = dict(
                observations=ob[None], rewards=np.array([rew]),
                resets=np.array([done]), terminations=np.array([term]))
            self.done = done
            return self.unwrapped.last_time_step

    # Wrap the environment for the viewer.
    environment = Wrapper(environment)

    def flatten(observation):
        '''Turns OrderedDict observations into vectors.'''
        observation = [np.array([o]) if np.isscalar(o) else o.ravel()
                       for o in observation.values()]
        return np.concatenate(observation, axis=0)


    def policy(timestep):
        '''Mimics a dm_control policy for the viewer.'''

        if environment.infos is not None:
            agent.test_update(**environment.infos, steps=environment.steps)
            environment.steps += 1
        # action = agent.test_step(environment.observations, environment.steps, environment.muscles_dep)
        return agent.test_step(environment.observations, environment.steps, environment.muscles_dep)
        # return agent.test_step(environment.observations, environment.steps)

    # Launch the viewer with the wrapped environment and policy.
    # viewer.launch(environment, policy)
    #EPISODES = 50
    EPISODES = N

    # Load task:



    # Setup video writer - mp4 at 30 fps
    video_name = 'video.mp4'
    frame = grabFrame(environment)
    height, width, layers = frame.shape
    video = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'mp4v'), 30.0, (width, height))
    for i in range(EPISODES):
        ep_return = 0
        state = flatten(environment.reset().observation)
        ep_steps = 0
        while True:
            action = policy(state)
            timestep = environment.step(action)
            frame = grabFrame(environment)
            # Render env output to video
            video.write(grabFrame(environment))
            next_state = flatten(timestep.observation)
            ep_return += timestep.reward 
            if environment.done:
                print(environment.physics.data.qpos[0] > 22)
                print(f'made it {environment.physics.data.qpos[0]} meters')
                # rets.append(environment.physics.data.qpos[0] // 20)
                break
            state = next_state.copy()
            ep_steps += 1
    video.release()
    cv2.destroyAllWindows()



def play(path, checkpoint, seed, header, agent, environment):
    '''Reloads an agent and an environment from a previous experiment.'''

    if agent is not None:
        agent = None
        header = None
        environment = None
    checkpoint_path = None

    if path:
        tonic.logger.log(f'Loading experiment from {path}')

        # Use no checkpoint, the agent is freshly created.
        if checkpoint == 'none' or agent is not None:
            tonic.logger.log('Not loading any weights')

        else:
            checkpoint_path = os.path.join(path, 'checkpoints')
            if not os.path.isdir(checkpoint_path):
                tonic.logger.error(f'{checkpoint_path} is not a directory')
                checkpoint_path = None

            # List all the checkpoints.
            checkpoint_ids = []
            for file in os.listdir(checkpoint_path):
                if file[:5] == 'step_':
                    checkpoint_id = file.split('.')[0]
                    checkpoint_ids.append(int(checkpoint_id[5:]))

            if checkpoint_ids:
                # Use the last checkpoint.
                if checkpoint == 'last':
                    checkpoint_id = max(checkpoint_ids)
                    checkpoint_path = os.path.join(
                        checkpoint_path, f'step_{checkpoint_id}')

                # Use the specified checkpoint.
                else:
                    checkpoint_id = int(checkpoint)
                    if checkpoint_id in checkpoint_ids:
                        checkpoint_path = os.path.join(
                            checkpoint_path, f'step_{checkpoint_id}')
                    else:
                        tonic.logger.error(f'Checkpoint {checkpoint_id} '
                                           f'not found in {checkpoint_path}')
                        checkpoint_path = None

            else:
                tonic.logger.error(f'No checkpoint found in {checkpoint_path}')
                checkpoint_path = None

        # Load the experiment configuration.
        arguments_path = os.path.join(path, 'config.yaml')
        with open(arguments_path, 'r') as config_file:
            config = yaml.load(config_file, Loader=yaml.FullLoader)
        config = argparse.Namespace(**config)
        print(config)
        header = header or config.header
        agent = agent or config.agent
        environment = environment or config.test_environment
        environment = environment or config.environment

    env_str_orig = environment
    # Run the header first, e.g. to load an ML framework.
    if header:
        exec(header)

    # Build the agent.
    if not agent:
        raise ValueError('No agent specified.')
    agent = eval(agent)
    # Build the environment.
    environment = func(environment)()
    environment.seed(seed)

    # Initialize the agent.
    agent.initialize(
        observation_space=environment.observation_space,
        action_space=environment.action_space, seed=seed)

    # Load the weights of the agent form a checkpoint.
    if checkpoint_path:
        agent.load(checkpoint_path, play=True)

    # Play with the agent in the environment.
    if isinstance(environment, tonic.environments.wrappers.ActionRescaler):
        environment_type = environment.env.__class__.__name__
    else:
        environment_type = environment.__class__.__name__
    descriptor = 'best_dep_mpo_fast'
    play_control_suite(agent, environment)


if __name__ == '__main__':
    # Argument parsing.
    parser = argparse.ArgumentParser()
    parser.add_argument('--path')
    parser.add_argument('--checkpoint', default='last')
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--header')
    parser.add_argument('--agent')
    parser.add_argument('--environment', '--env')
    args = vars(parser.parse_args())
    play(**args)
