import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from models.ppo.distributions import Bernoulli, Categorical, DiagGaussian
from models.ppo.utils import init


class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)


class Policy(nn.Module):
    def __init__(self, obs_shape, action_space, config=None, base=None, base_kwargs=None):
        super(Policy, self).__init__()
        if base_kwargs is None:
            base_kwargs = {}

        from ..RL.kuka_RL_model import kukaNet_RSI1, kukaNet_RSI2, kukaNet_RSI3
        from ..RL.turtlebot_RL_model import turtlebotNet_RSI1, turtlebotNet_RSI2, turtlebotNet_RSI3
        from ..RL.ai2thor_RL_model import ai2thorNet_RSI1, ai2thorNet_RSI2, ai2thorNet_RSI3
        base_func={
            'kuka_RSI1': kukaNet_RSI1, 'kuka_RSI2': kukaNet_RSI2, 'kuka_RSI3': kukaNet_RSI3,
            'kinova_RSI2': kukaNet_RSI2,
            'turtlebot_RSI1': turtlebotNet_RSI1, 'turtlebot_RSI2': turtlebotNet_RSI2, 'turtlebot_RSI3': turtlebotNet_RSI3,
            'ai2thor_RSI1': ai2thorNet_RSI1, 'ai2thor_RSI2': ai2thorNet_RSI2, 'ai2thor_RSI3': ai2thorNet_RSI3,
        }
        if base is None:
            raise NotImplementedError
        else:
            base=base_func[base]
            self.base=base(obs_shape, config, **base_kwargs)

        if action_space.__class__.__name__ == "Discrete":
            num_outputs = action_space.n
            self.dist = Categorical(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "Box":
            num_outputs = action_space.shape[0]
            self.dist = DiagGaussian(self.base.output_size, num_outputs)
        elif action_space.__class__.__name__ == "MultiBinary":
            num_outputs = action_space.shape[0]
            self.dist = Bernoulli(self.base.output_size, num_outputs)
        else:
            raise NotImplementedError

    @property
    def is_recurrent(self):
        return self.base.is_recurrent

    @property
    def recurrent_hidden_state_size(self):
        """Size of rnn_hx."""
        return self.base.recurrent_hidden_state_size

    def forward(self, inputs, rnn_hxs, masks):
        raise NotImplementedError

    def act(self, inputs, rnn_hxs, masks, deterministic=False):
        value, actor_features, rnn_hxs, _ = self.base(inputs, rnn_hxs, masks)
        dist = self.dist(actor_features)

        if deterministic:
            action = dist.mode()
        else:
            action = dist.sample()

        action_log_probs = dist.log_probs(action)
        dist_entropy = dist.entropy().mean()

        return value, action, action_log_probs, rnn_hxs

    def get_value(self, inputs, rnn_hxs, masks):
        value, _, _, _ = self.base(inputs, rnn_hxs, masks)
        return value

    def evaluate_actions(self, inputs, rnn_hxs, masks, action):
        value, actor_features, rnn_hxs, additional = self.base(inputs, rnn_hxs, masks, infer=False)
        dist = self.dist(actor_features)

        action_log_probs = dist.log_probs(action)
        dist_entropy = dist.entropy().mean()

        return value, action_log_probs, dist_entropy, rnn_hxs, additional


class NNBase(nn.Module):
    def __init__(self, recurrent, recurrent_input_size, recurrent_size, action_hidden_size):
        super(NNBase, self).__init__()

        self._action_hidden_size = action_hidden_size
        self._recurrent_size = recurrent_size
        self._recurrent = recurrent
        self.device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        if recurrent:
            self.gru = nn.GRU(recurrent_input_size, recurrent_size)
            for name, param in self.gru.named_parameters():
                if 'bias' in name:
                    nn.init.constant_(param, 0)
                elif 'weight' in name:
                    nn.init.orthogonal_(param)

    @property
    def is_recurrent(self):
        return self._recurrent

    @property
    def recurrent_hidden_state_size(self):
        if self._recurrent:
            return self._recurrent_size
        return 1

    @property
    def output_size(self):
        return self._action_hidden_size

    def _forward_gru(self, x, hxs, masks):
        if x.size(0) == hxs.size(0):
            x, hxs = self.gru(x.unsqueeze(0), (hxs * masks).unsqueeze(0))
            x = x.squeeze(0)
            hxs = hxs.squeeze(0)
        else:
            # x is a (T, N, -1) tensor that has been flatten to (T * N, -1)
            N = hxs.size(0)
            T = int(x.size(0) / N)

            # unflatten
            x = x.view(T, N, x.size(1))

            # Same deal with masks
            masks = masks.view(T, N)

            # Let's figure out which steps in the sequence have a zero for any agent
            # We will always assume t=0 has a zero in it as that makes the logic cleaner
            has_zeros = ((masks[1:] == 0.0) \
                            .any(dim=-1)
                            .nonzero()
                            .squeeze()
                            .cpu())

            # +1 to correct the masks[1:]
            if has_zeros.dim() == 0:
                # Deal with scalar
                has_zeros = [has_zeros.item() + 1]
            else:
                has_zeros = (has_zeros + 1).numpy().tolist()

            # add t=0 and t=T to the list
            has_zeros = [0] + has_zeros + [T]

            hxs = hxs.unsqueeze(0)
            outputs = []
            for i in range(len(has_zeros) - 1):
                # We can now process steps that don't have any zeros in masks together!
                # This is much faster
                start_idx = has_zeros[i]
                end_idx = has_zeros[i + 1]

                rnn_scores, hxs = self.gru(
                    x[start_idx:end_idx],
                    hxs * masks[start_idx].view(1, -1, 1))

                outputs.append(rnn_scores)

            # x is a (T, N, -1) tensor
            x = torch.cat(outputs, dim=0)
            # flatten
            x = x.view(T * N, -1)
            hxs = hxs.squeeze(0)

        return x, hxs
