import torch
from torch_geometric.nn import MessagePassing
# from torch_geometric.nn import GCNConv
# torch_geometric.nn.
import torch.nn.functional as F
# from torchdiffeq import odeint_adjoint as odeint
from torchdiffeq import odeint as odeint
import torch.nn as nn
from torch_geometric.nn import GCNConv, Linear


class GFUNK_layer(nn.Module):
    def __init__(self, in_channels, out_channels, modes):
        super(GFNO, self).__init__()

        """
        Graph Laplacian layer. It does graph fourier transform, reduction transform, 
        and Inverse graph fourier transform.    
        """

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.modes = modes

        self.scale = (1 / (in_channels * out_channels))
        self.weights1 = nn.Parameter(self.scale * torch.rand(modes, dtype=torch.float))


    def addW(self, input, weights):
        return torch.matmul(input, weights)

    def forward(self, x, redfor, redvals):  # , fulinvy):
        # Input should be a batch size of 1 and is number of nodes in graph x width
        x_gft = torch.matmul(redfor, x)
        x_gft = torch.matmul(torch.diag(self.weights1), x_gft)
        x_return = torch.matmul(redfor.transpose(0, 1), x_gft)
        return x_return



class GFUNK(torch.nn.Module):
    def __init__(self, modes, width):
        super(gfnogamma, self).__init__()


        self.modes1 = modes
        self.width = width
        self.fco = nn.Linear(1, self.width)

        self.conv0 = GFUNK_layer(self.width, self.width, self.modes1)
        self.w0 = Linear(self.width, self.width)
        self.conv1 = GFUNK_layer(self.width, self.width, self.modes1)
        self.w1 = Linear(self.width, self.width)
        self.conv2 = GFUNK_layer(self.width, self.width, self.modes1)
        self.w2 = Linear(self.width, self.width)

        self.fc1 = Linear(self.width, 32)
        self.fcOut = Linear(32, 1)

    def forward(self, u, pos, diffusion, redfor, redvals, fibers): #, fulinvx, fulinvy):
        inSig = self.fco(u)

        x1 = self.conv0(inSig, redfor, redvals)
        x2 = self.w0(inSig)
        x = F.gelu(x1 + x2)

        x1 = self.conv1(x, redfor, redvals)
        x2 = self.w1(x)
        x = F.gelu(x1 + x2)

        x1 = self.conv2(x, redfor, redvals)
        x2 = self.w2(x)
        x = F.gelu(x1 + x2)

        outSig = self.fc1(x)
        outSig = torch.sigmoid(outSig)
        outSig = self.fcOut(outSig)


        return outSig


class ODEfunc(nn.Module):
    def __init__(self, model, params=None):
        super().__init__()
        self.model = model
        if params is None:
            self.params = {}
        else:
            self.params = params

    def forward(self, t, u):
        return self.model(u, **self.params)

    def update_params(self, params):
        self.params.update(params)


class ODEBlock(nn.Module):
    def __init__(self, odefunc, method, rtol=1e-7, atol=1e-9):
        super(ODEBlock, self).__init__()
        self.odefunc = odefunc
        self.rtol = rtol
        self.atol = atol
        self.method = method

    def forward(self, x, t):
        options = {
            'dtype': torch.float64,
            'step_size': .00025
            # 'first_step': 1.0e-9,
            # 'grid_points': t,
        }

        adjoint_options = {
            'norm': "seminorm"
        }
        out = odeint(
            self.odefunc,
            x,
            t,
            method=self.method,
            rtol=self.rtol,
            atol=self.atol,
            options=options,
            #adjoint_options=adjoint_options,
        )
        return out


# TODO: Boundary conditions are assumed to be the same on the entire boundary. This can be relaxed.
def get_full_model(modes, width,
        bd_conditions,
        withD,
        msg_nodes,
        msg_dim,
        aggr_nodes,
        int_method='adaptive_heun',
        int_rtol=0.0,
        int_atol=1e-5,
        device='cuda'
):
    """
    Builds and returns the entire ODE message passing model
    Args:
        bd_conditions: 'none', 'neumann', or 'dirichlet'. Same for entire boundary
        msg_nodes: number of nodes for the message network
        msg_dim: dimension of message network output
        aggr_nodes: number of nodes for the aggregation dense neural network
        int_method: method used for the ODE integrator
        int_rtol: relative tolerance used for the ODE integrator
        int_atol: absolute tolerance used for the ODE integrator
        device: cpu, gpu, etc

    Returns: model

    """
    ode_func = GFUNK(modes=modes, width=width)
    ode_func = ODEfunc(ode_func).to(device)
    ode_model = ODEBlock(ode_func, int_method, rtol=int_rtol, atol=int_atol).to(device)

    return ode_model
