import torch
import torch.nn as nn
from torch.nn import Parameter
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.nn.inits import glorot, zeros
from torch_geometric.utils import add_remaining_self_loops, is_undirected
from torch_scatter import scatter_add

from MegaGNN.graphgym.config import cfg


class GeneralConvLayerV2(MessagePassing):
    r"""General GNN layer
    """
    def __init__(self,
                 in_channels,
                 out_channels,
                 improved=False,
                 cached=False,
                 bias=True,
                 **kwargs):
        super(GeneralConvLayerV2, self).__init__(aggr=cfg.gnn.agg,
                                                 flow=cfg.gnn.flow,
                                                 **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.improved = improved
        self.cached = cached
        self.normalize = cfg.gnn.normalize_adj

        self.weight = Parameter(torch.Tensor(in_channels, out_channels))
        if cfg.gnn.self_msg == 'concat':
            self.weight_self = Parameter(
                torch.Tensor(in_channels, out_channels))

        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self):
        glorot(self.weight)
        if cfg.gnn.self_msg == 'concat':
            glorot(self.weight_self)
        zeros(self.bias)
        self.cached_result = None
        self.cached_num_edges = None

    @staticmethod
    def norm(edge_index,
             num_nodes,
             edge_weight=None,
             improved=False,
             dtype=None):
        if edge_weight is None:
            edge_weight = torch.ones((edge_index.size(1), ),
                                     dtype=dtype,
                                     device=edge_index.device)

        fill_value = 1.0 if not improved else 2.0
        edge_index, edge_weight = add_remaining_self_loops(
            edge_index, edge_weight, fill_value, num_nodes)

        row, col = edge_index
        if is_undirected(edge_index, num_nodes=num_nodes):
            deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
            deg_inv_sqrt = deg.pow(-0.5)
            deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
            norm = deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]
        else:
            if cfg.gnn.flow == 'source_to_target':
                deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
            else:
                deg = scatter_add(edge_weight, col, dim=0, dim_size=num_nodes)
            deg_inv_sqrt = deg.pow(-1.0)
            deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
            norm = (deg_inv_sqrt[row] if cfg.gnn.flow == 'source_to_target'
                    else deg_inv_sqrt[col]) * edge_weight

        return edge_index, norm

    def forward(self, x, edge_index, edge_weight=None, edge_feature=None):
        """"""
        if cfg.gnn.self_msg == 'concat':
            x_self = torch.matmul(x, self.weight_self)
        x = torch.matmul(x, self.weight)

        if self.cached and self.cached_result is not None:
            if edge_index.size(1) != self.cached_num_edges:
                raise RuntimeError(
                    'Cached {} number of edges, but found {}. Please '
                    'disable the caching behavior of this layer by removing '
                    'the `cached=True` argument in its constructor.'.format(
                        self.cached_num_edges, edge_index.size(1)))

        if not self.cached or self.cached_result is None:
            self.cached_num_edges = edge_index.size(1)
            if self.normalize:
                edge_index, norm = self.norm(edge_index, x.size(self.node_dim),
                                             edge_weight, self.improved,
                                             x.dtype)
            else:
                if cfg.gnn.self_msg == 'none':
                    # add self-loop to ensure the final output
                    # has considered h_{v}^{l-1}
                    edge_index, edge_weight = add_remaining_self_loops(
                        edge_index, edge_weight, 2 if self.improved else 1,
                        x.size(self.node_dim))
                norm = edge_weight
            self.cached_result = edge_index, norm

        edge_index, norm = self.cached_result
        x_msg = self.propagate(edge_index,
                               x=x,
                               norm=norm,
                               edge_feature=edge_feature)
        if cfg.gnn.self_msg == 'none':
            return x_msg
        elif cfg.gnn.self_msg == 'add':
            return x_msg + x
        elif cfg.gnn.self_msg == 'concat':
            return x_msg + x_self
        else:
            raise ValueError('self_msg {} not defined'.format(
                cfg.gnn.self_msg))

    def message(self, x_j, norm, edge_feature):
        if edge_feature is None:
            return norm.view(-1, 1) * x_j if norm is not None else x_j
        else:
            return norm.view(-1, 1) * (
                x_j + edge_feature) if norm is not None else (x_j +
                                                              edge_feature)

    def update(self, aggr_out):
        if self.bias is not None:
            aggr_out = aggr_out + self.bias
        return aggr_out

    def __repr__(self):
        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
                                   self.out_channels)


class GeneralEdgeConvLayerV2(MessagePassing):
    r"""General GNN layer, with edge features
    """
    def __init__(self,
                 in_channels,
                 out_channels,
                 improved=False,
                 cached=False,
                 bias=True,
                 **kwargs):
        super(GeneralEdgeConvLayerV2, self).__init__(aggr=cfg.gnn.agg,
                                                     flow=cfg.gnn.flow,
                                                     **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.improved = improved
        self.cached = cached
        self.normalize = cfg.gnn.normalize_adj
        self.msg_direction = cfg.gnn.msg_direction

        if self.msg_direction == 'single':
            self.linear_msg = nn.Linear(in_channels + cfg.dataset.edge_dim,
                                        out_channels,
                                        bias=False)
        else:
            self.linear_msg = nn.Linear(in_channels * 2 + cfg.dataset.edge_dim,
                                        out_channels,
                                        bias=False)
        if cfg.gnn.self_msg == 'concat':
            self.linear_self = nn.Linear(in_channels, out_channels, bias=False)

        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self):
        zeros(self.bias)
        self.cached_result = None
        self.cached_num_edges = None

    @staticmethod
    def norm(edge_index,
             num_nodes,
             edge_weight=None,
             improved=False,
             dtype=None):
        if edge_weight is None:
            edge_weight = torch.ones((edge_index.size(1), ),
                                     dtype=dtype,
                                     device=edge_index.device)

        fill_value = 1.0 if not improved else 2.0
        edge_index, edge_weight = add_remaining_self_loops(
            edge_index, edge_weight, fill_value, num_nodes)

        row, col = edge_index
        if is_undirected(edge_index, num_nodes=num_nodes):
            deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
            deg_inv_sqrt = deg.pow(-0.5)
            deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
            norm = deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]
        else:
            if cfg.gnn.flow == 'source_to_target':
                deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
            else:
                deg = scatter_add(edge_weight, col, dim=0, dim_size=num_nodes)
            deg_inv_sqrt = deg.pow(-1.0)
            deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
            norm = (deg_inv_sqrt[row] if cfg.gnn.flow == 'source_to_target'
                    else deg_inv_sqrt[col]) * edge_weight

        return edge_index, norm

    def forward(self, x, edge_index, edge_weight=None, edge_feature=None):
        if self.cached and self.cached_result is not None:
            if edge_index.size(1) != self.cached_num_edges:
                raise RuntimeError(
                    'Cached {} number of edges, but found {}. Please '
                    'disable the caching behavior of this layer by removing '
                    'the `cached=True` argument in its constructor.'.format(
                        self.cached_num_edges, edge_index.size(1)))

        if not self.cached or self.cached_result is None:
            self.cached_num_edges = edge_index.size(1)
            if self.normalize:
                edge_index, norm = self.norm(edge_index, x.size(self.node_dim),
                                             edge_weight, self.improved,
                                             x.dtype)
            else:
                norm = edge_weight
            self.cached_result = edge_index, norm

        edge_index, norm = self.cached_result

        x_msg = self.propagate(edge_index,
                               x=x,
                               norm=norm,
                               edge_feature=edge_feature)

        if cfg.gnn.self_msg == 'concat':
            x_self = self.linear_self(x)
            return x_self + x_msg
        elif cfg.gnn.self_msg == 'add':
            return x + x_msg
        else:
            return x_msg

    def message(self, x_i, x_j, norm, edge_feature):
        if self.msg_direction == 'both':
            x_j = torch.cat((x_i, x_j, edge_feature), dim=-1)
        else:
            x_j = torch.cat((x_j, edge_feature), dim=-1)
        x_j = self.linear_msg(x_j)
        return norm.view(-1, 1) * x_j if norm is not None else x_j

    def update(self, aggr_out):
        if self.bias is not None:
            aggr_out = aggr_out + self.bias
        return aggr_out

    def __repr__(self):
        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
                                   self.out_channels)
