import numpy as np
import os
from PIL import Image
from rlkit.torch.data_aug import center_crop
import rlkit.torch.pytorch_util as ptu


def rollout(
    env,
    agent,
    image_embedding_encoder,
    context_encoder,
    max_path_length=np.inf,
    accum_context=True,
    resample_z=False,
    animated=False,
    return_log_prob=False,
    save_frames=False,
    crop=True,
):
    """
    The following value for the following keys will be a 2D array, with the
    first dimension corresponding to the time dimension.
     - observations
     - actions
     - rewards
     - next_observations
     - terminals

    The next two elements will be lists of dictionaries, with the index into
    the list being the index into the time
     - agent_infos
     - env_infos

    :param env:
    :param agent:
    :param max_path_length:
    :param animated:
    :param accum_context: if True, accumulate the collected context
    :return:
    """
    observations = []
    log_pis = []
    actions = []
    rewards = []
    terminals = []
    agent_infos = []
    env_infos = []
    o = env.reset()
    next_o = None
    path_length = 0
    if animated:
        env.render()
    i = 0
    while path_length < max_path_length:
        i += 1
        if crop:
            cropped_o = center_crop(o.copy())
            agent_o = cropped_o
        else:
            agent_o = o
        agent_o_torch = ptu.from_numpy(agent_o)
        image_embedding = ptu.get_numpy(image_embedding_encoder(agent_o_torch.unsqueeze(0)).squeeze(0))
        if return_log_prob:
            a, log_pi, agent_info = agent.get_action(image_embedding)
        else:
            a, agent_info = agent.get_action(image_embedding)
        next_o, r, d, env_info = env.step(a)
        # update the agent's current context
        if accum_context:
            # agent_o = ptu.get_numpy(context_encoder.get_image_embedding(agent_o_torch.unsqueeze(0)).squeeze(0))
            image_embedding_torch = ptu.from_numpy(image_embedding)
            agent_o = ptu.get_numpy(context_encoder.get_image_embedding(image_embedding_torch.unsqueeze(0)).squeeze(0))
            agent.update_context([agent_o, a, r, next_o, d, env_info])
        observations.append(o)
        if return_log_prob:
            log_pis.append(log_pi)
        rewards.append(r)
        terminals.append(d)
        actions.append(a)
        agent_infos.append(agent_info)
        # env_infos.append(env_info)
        # image = Image.fromarray(np.flipud(env.get_image().transpose(1, 2, 0)))
        # env_info['frame'] = image
        # os.makedirs('reacher_images', exist_ok=True)
        # image.save(os.path.join('reacher_images', '%06d.jpg' %i))
        if save_frames:
            image = Image.fromarray(np.flipud(env.get_image().transpose(1, 2, 0)))
            env_info['frame'] = image
        env_infos.append(env_info)
        path_length += 1
        if d:
            break
        o = next_o
        if animated:
            env.render()

    actions = np.array(actions)
    if len(actions.shape) == 1:
        actions = np.expand_dims(actions, 1)
    observations = np.array(observations)
    log_pis = np.array(log_pis)
    if len(log_pis.shape) == 1:
        log_pis = np.expand_dims(log_pis, 1)
    """
    if len(observations.shape) == 1:
        observations = np.expand_dims(observations, 1)
        next_o = np.array([next_o])
    """
    next_observations = np.vstack((observations[1:, :], np.expand_dims(next_o, 0)))
    return dict(
        observations=observations,
        actions=actions,
        log_pis=log_pis,
        rewards=np.array(rewards).reshape(-1, 1),
        next_observations=next_observations,
        terminals=np.array(terminals).reshape(-1, 1),
        agent_infos=agent_infos,
        env_infos=env_infos,
    )


def split_paths(paths):
    """
    Stack multiples obs/actions/etc. from different paths
    :param paths: List of paths, where one path is something returned from
    the rollout functino above.
    :return: Tuple. Every element will have shape batch_size X DIM, including
    the rewards and terminal flags.
    """
    rewards = [path["rewards"].reshape(-1, 1) for path in paths]
    terminals = [path["terminals"].reshape(-1, 1) for path in paths]
    actions = [path["actions"] for path in paths]
    obs = [path["observations"] for path in paths]
    next_obs = [path["next_observations"] for path in paths]
    rewards = np.vstack(rewards)
    terminals = np.vstack(terminals)
    obs = np.vstack(obs)
    actions = np.vstack(actions)
    next_obs = np.vstack(next_obs)
    assert len(rewards.shape) == 2
    assert len(terminals.shape) == 2
    assert len(obs.shape) == 2
    assert len(actions.shape) == 2
    assert len(next_obs.shape) == 2
    return rewards, terminals, obs, actions, next_obs


def split_paths_to_dict(paths):
    rewards, terminals, obs, actions, next_obs = split_paths(paths)
    return dict(
        rewards=rewards,
        terminals=terminals,
        observations=obs,
        actions=actions,
        next_observations=next_obs,
    )


def get_stat_in_paths(paths, dict_name, scalar_name):
    if len(paths) == 0:
        return np.array([[]])

    if type(paths[0][dict_name]) == dict:
        # Support rllab interface
        return [path[dict_name][scalar_name] for path in paths]

    return [[info[scalar_name] for info in path[dict_name]] for path in paths]
