import torch
import torch.nn as nn

from dcrl.utils.model_utils import layer_init


class ObsStateCriticModel(nn.Module):
    def __init__(
        self,
        obs_space,
        recurrent=True,
        hidden_size_list=None,
        rnn_hidden_size=None,
        get_representation_net_func=None,
    ):
        super().__init__()

        self.recurrent = recurrent
        self.hidden_size_list = hidden_size_list
        self.rnn_hidden_size = rnn_hidden_size

        # Define representation net
        self.obs_representation_net, self.obs_tensor_type, self.obs_rep_size = get_representation_net_func(obs_space["obs"])
        self.state_representation_net, self.state_tensor_type, self.state_rep_size = get_representation_net_func(
            obs_space["state"]
        )

        self.embedding_size = self.obs_rep_size + self.state_rep_size

        # Define memory
        if self.recurrent:
            self.memory_rnn = nn.LSTMCell(self.obs_rep_size, self.rnn_hidden_size)
            for name, param in self.memory_rnn.named_parameters():
                if "bias" in name:
                    nn.init.constant_(param, 0)
                elif "weight" in name:
                    nn.init.orthogonal_(param, 1.0)
            self.embedding_size += self.rnn_hidden_size

        # Define value net
        self.value_net = []
        input_size = self.embedding_size
        for i in range(len(self.hidden_size_list)):
            self.value_net.append(layer_init(nn.Linear(input_size, self.hidden_size_list[i])))
            self.value_net.append(nn.ReLU())
            input_size = self.hidden_size_list[i]
        self.value_net = nn.Sequential(*self.value_net)

        self.value_head = layer_init(nn.Linear(input_size, 1), std=1.0)

    @property
    def memory_size(self):
        return 2 * self.rnn_hidden_size

    @property
    def value_size(self):
        return 1

    def forward(self, obs, memory=None):
        x_obs = obs["obs"].type(self.obs_tensor_type)
        x_state = obs["state"].type(self.state_tensor_type)
        obs_embedding = self.obs_representation_net(x_obs)
        state_embedding = self.state_representation_net(x_state)

        if self.recurrent:
            hidden = (memory[:, : self.rnn_hidden_size], memory[:, self.rnn_hidden_size :])
            hidden = self.memory_rnn(obs_embedding, hidden)
            obs_embedding = torch.cat([hidden[0], obs_embedding], dim=1)
            memory = torch.cat(hidden, dim=1)
        embedding = torch.cat((obs_embedding, state_embedding), dim=-1)

        output = self.value_net(embedding)
        value = self.value_head(output)

        return value, memory
