import torch
from torch.nn import Parameter
from torch_scatter import scatter_add
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.utils import add_remaining_self_loops

from torch_geometric.nn.inits import glorot, zeros,uniform,ones
from torch_scatter import scatter, segment_csr
import torch.nn.functional as F


class GCNConv(MessagePassing):
    r"""The graph convolutional operator from the `"Semi-supervised
    Classification with Graph Convolutional Networks"
    <https://arxiv.org/abs/1609.02907>`_ paper

    .. math::
        \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
        \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta},

    where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
    adjacency matrix with inserted self-loops and
    :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.

    Args:
        in_channels (int): Size of each input sample.
        out_channels (int): Size of each output sample.
        improved (bool, optional): If set to :obj:`True`, the layer computes
            :math:`\mathbf{\hat{A}}` as :math:`\mathbf{A} + 2\mathbf{I}`.
            (default: :obj:`False`)
        cached (bool, optional): If set to :obj:`True`, the layer will cache
            the computation of :math:`\mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
            \mathbf{\hat{D}}^{-1/2}` on first execution, and will use the
            cached version for further executions.
            This parameter should only be set to :obj:`True` in transductive
            learning scenarios. (default: :obj:`False`)
        bias (bool, optional): If set to :obj:`False`, the layer will not learn
            an additive bias. (default: :obj:`True`)
        normalize (bool, optional): Whether to add self-loops and apply
            symmetric normalization. (default: :obj:`True`)
        **kwargs (optional): Additional arguments of
            :class:`torch_geometric.nn.conv.MessagePassing`.
    """

    def __init__(self, in_channels, out_channels, improved=False, cached=False,
                 bias=True, normalize=True, **kwargs):
        super(GCNConv, self).__init__(aggr='add', **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.improved = improved
        self.cached = cached
        self.normalize = normalize

        self.weight = Parameter(torch.Tensor(in_channels, out_channels))
        self.weight2 = Parameter(torch.Tensor(2*out_channels, out_channels))

        self.bias_b = Parameter(torch.Tensor(out_channels))
        self.bias_a = Parameter(torch.Tensor(out_channels))

        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()
        self.alpha=0.3
    def reset_parameters(self):
        glorot(self.weight)
        glorot(self.weight2)
        zeros(self.bias)
        # uniform(self.out_channels, self.bias_b)
        # uniform(self.out_channels, self.bias_a)
        zeros(self.bias_a)
        self.cached_result = None
        self.cached_num_edges = None

    @staticmethod

    def norm(edge_index, num_nodes, edge_weight=None, improved=False,
             dtype=None):
        if edge_weight is None:
            edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype,
                                     device=edge_index.device)

        fill_value = 1 if not improved else 2
        # edge_index, edge_weight = add_remaining_self_loops(
        #     edge_index, edge_weight, fill_value, num_nodes)

        row, col = edge_index
        deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        # [0.5774, 0.7071, 0.7071]
        # [0, 0, 1, 2, 0, 1, 2]
        # [0.5774, 0.5774, 0.7071, 0.7071, 0.5774, 0.7071, 0.7071]
        return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]

    def forward(self, x, edge_index, edge_weight=None):
        """"""
        # x = torch.matmul(x, self.weight)

        if self.cached and self.cached_result is not None:
            if edge_index.size(1) != self.cached_num_edges:
                raise RuntimeError(
                    'Cached {} number of edges, but found {}. Please '
                    'disable the caching behavior of this layer by removing '
                    'the `cached=True` argument in its constructor.'.format(
                        self.cached_num_edges, edge_index.size(1)))

        if not self.cached or self.cached_result is None:
            self.cached_num_edges = edge_index.size(1)
            if self.normalize:
                edge_index, norm = self.norm(edge_index, x.size(
                    self.node_dim), edge_weight, self.improved, x.dtype)
            else:
                norm = edge_weight
            self.cached_result = edge_index, norm

        edge_index, norm = self.cached_result
        
        for i in range(1,10000):
            x=self.propagate(edge_index, x=x, norm=norm ,old_x=x)
            p=x
            # x = x * (1 - self.alpha)
            # x = x + self.alpha * hid
 
        return x

    def message(self, x_j,x_i, norm,old_x):




        D=None
        kappa=None
        kappa = torch.sum(old_x,dim=0)
        D = torch.sum((x_i*x_j)/kappa,dim=1)

        kappa=kappa.view(-1,x_i.shape[1])
        D=D.view(-1,1)

        q = (x_i*x_j)/(D*kappa)
        return   q
        # return norm.view(-1, 1) * x_j  if norm is not None else x_j


        # h_i=F.softmax(x_i,dim=1)
        # h_j=F.softmax(x_j,dim=1)
        # a=x_i*x_j
        # b=torch.sum(a,dim=1).view(a.shape[0],-1)+self.bias_b
        # q=a/b
        # p=torch.sum(q,dim=1)
        # self.sum_i=torch.sum(x_i,dim=0)

        # self.sum_i=torch.sqrt(torch.sum(q,dim=0))

        
        # message=torch.cat([norm.view(-1, 1)*x_j , x_i*self.bias_a * x_j],dim=1)
        # me = torch.matmul(message, self.weight2) 
        # message=norm.view(-1, 1)*x_j + x_i*self.bias_a * x_j
        # me = torch.matmul(message, self.weight2) 


        # def mapper(idx):
        #     alpha = (torch.cat([x_i, x_j], dim=-1) * self.feat_att[idx]).sum(dim=-1) + self.feat_att_bias[:, idx]
        #     alpha = self.lambda_ * torch.sigmoid(alpha)
        #     return alpha

        # alpha = torch.stack([mapper(idx) for idx in range(self.out_channels)], dim=-1)
        # return   q
        # return x_j
    def aggregate(self, inputs, index, ptr=None, dim_size=None):
        aggr_mean = scatter(inputs, index, dim=self.node_dim,
                                dim_size=dim_size, reduce='add')

        # aggr_mean=aggr_mean/self.sum_i
        return aggr_mean

    def update(self, aggr_out):

        # if self.bias is not None:
        #     aggr_out = aggr_out + self.bias
        return aggr_out
    def __repr__(self):
        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,self.out_channels)
