import torch
import torch.nn as nn
from layers.Graph import GraphConv, mixprop
from layers.RevIn import RevIN
from layers.GRformer_layer import TruncateModule

class moving_avg(nn.Module):
    """
    Moving average block to highlight the trend of time series
    """
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        # padding on the both ends of time series
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x


class series_decomp(nn.Module):
    """
    Series decomposition block
    """
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        res = x - moving_mean
        return res, moving_mean

class DLinear(nn.Module):
    """
    Decomposition-Linear
    """
    def __init__(self, configs):
        super(DLinear, self).__init__()
        self.seq_len = configs.seq_len
        self.pred_len = configs.pred_len
        # RevIn
        self.revin = configs.revin
        if self.revin:
            self.revin_layer = RevIN(configs.enc_in, affine=configs.affine, subtract_last=False)

        # Decompsition Kernel Size
        kernel_size = 25
        self.decompsition = series_decomp(kernel_size)
        self.individual = configs.individual
        self.channels = configs.enc_in

        if self.individual:
            self.Linear_Seasonal = nn.ModuleList()
            self.Linear_Trend = nn.ModuleList()
            
            for i in range(self.channels):
                self.Linear_Seasonal.append(nn.Linear(self.seq_len,self.pred_len))
                self.Linear_Trend.append(nn.Linear(self.seq_len,self.pred_len))
        else:
            self.Linear_Seasonal = nn.Linear(self.seq_len,self.pred_len)
            self.Linear_Trend = nn.Linear(self.seq_len,self.pred_len)
            
    def forward(self, x):
        # x: [Batch, Input length, Channel]
        if self.revin:
            x = self.revin_layer(x, 'norm')
        seasonal_init, trend_init = self.decompsition(x)
        seasonal_init, trend_init = seasonal_init.permute(0,2,1), trend_init.permute(0,2,1)
        if self.individual:
            seasonal_output = torch.zeros([seasonal_init.size(0),seasonal_init.size(1),self.pred_len],dtype=seasonal_init.dtype).to(seasonal_init.device)
            trend_output = torch.zeros([trend_init.size(0),trend_init.size(1),self.pred_len],dtype=trend_init.dtype).to(trend_init.device)
            for i in range(self.channels):
                seasonal_output[:,i,:] = self.Linear_Seasonal[i](seasonal_init[:,i,:])
                trend_output[:,i,:] = self.Linear_Trend[i](trend_init[:,i,:])
        else:
            seasonal_output = self.Linear_Seasonal(seasonal_init)
            trend_output = self.Linear_Trend(trend_init)

        x = seasonal_output + trend_output
        x = x.permute(0,2,1)
        if self.revin:
            x = self.revin_layer(x, 'denorm')
        return x


class NLinear(nn.Module):
    """
    Normalization-Linear
    """
    def __init__(self, configs):
        super(NLinear, self).__init__()
        self.seq_len = configs.seq_len
        self.pred_len = configs.pred_len
        self.Linear = nn.Linear(self.seq_len, self.pred_len)
        # Use this line if you want to visualize the weights
        # self.Linear.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))

    def forward(self, x):
        # x: [Batch, Input length, Channel]
        seq_last = x[:,-1:,:].detach()
        x = x - seq_last
        x = self.Linear(x.permute(0,2,1)).permute(0,2,1)
        x = x + seq_last
        return x # [Batch, Output length, Channel]


class GLinear(nn.Module):
    def __init__(self, args, corr=None, high_correlated_count=None) -> None:
        super(GLinear, self).__init__()
        self.seq_len = args.seq_len
        self.pred_len = args.pred_len
        self.channels = args.enc_in
        self.revin = args.revin
        self.use_gcn = args.use_gcn
        if self.revin:
            self.revin_layer = RevIN(args.enc_in, affine=args.affine, subtract_last=False)
        # GCN
        if self.use_gcn:
            self.gc = GraphConv(corr=corr, high_correlated_count=high_correlated_count, 
                                node_num=self.channels, d_node=args.d_node, top_k=args.subgraph_size, tanh_alpha=args.tanh_alpha, device=args.device)
            self.id_list = torch.arange(args.enc_in)

            self.gconv1 = mixprop(self.channels, self.channels, args.gcn_depth, args.dropout, args.prop_alpha, args.d_model)
            self.gconv2 = mixprop(self.channels, self.channels, args.gcn_depth, args.dropout, args.prop_alpha, args.d_model)

        # Patch
        self.seq_len = args.seq_len
        self.patch_len = args.patch_len
        self.stride = self.patch_len//2 if args.stride=="half" else self.patch_len

        if args.seq_len % self.stride==0:                    # 能整除说明其实不用强行填充
            self.patch_num = int((self.seq_len - self.patch_len)/self.stride + 1)
            self.process_layer = nn.Identity()                      # 直接原封不动输出
        else:
            # 对于长度不够的情况，填充的策略是尾部填充
            if args.padding_patch=="end":
                padding_length = self.stride - (self.seq_len % self.stride)
                self.patch_num = int((self.seq_len - self.patch_len)/self.stride + 2)
                self.process_layer = nn.ReplicationPad1d((0, padding_length))
            # 非填充的策略是直接截断
            else:
                truncated_length = self.seq_len - (self.seq_len % self.stride)
                self.patch_num = int((self.seq_len - self.patch_len)/self.stride + 1)
                self.process_layer = TruncateModule(truncated_length)

        # Decompsition Kernel Size
        kernel_size = 25
        self.decompsition = series_decomp(kernel_size)

        self.Linear_Seasonal = nn.ModuleList([
            nn.Linear(self.patch_len, args.d_model),
            nn.Linear(self.patch_num, self.patch_num),
            nn.Linear(self.patch_num*args.d_model, self.pred_len)
        ])
        self.Linear_Trend = nn.ModuleList([
            nn.Linear(self.patch_len, args.d_model),
            nn.Linear(self.patch_num, self.patch_num),
            nn.Linear(self.patch_num*args.d_model, self.pred_len)
        ])
        self.drop_Seasonal = nn.Dropout(args.dropout)
        self.drop_Trend = nn.Dropout(args.dropout)
        self.norm_attn_s = nn.Sequential(Transpose(1,2), nn.BatchNorm1d(args.d_model), Transpose(1,2))
        self.norm_attn_t = nn.Sequential(Transpose(1,2), nn.BatchNorm1d(args.d_model), Transpose(1,2))
    
    def forward(self, x):
        if self.use_gcn:
            A, A0 = self.gc(self.id_list.to(x.device))
        else:
            A, A0 = None, None
        if self.revin:
            x = self.revin_layer(x, 'norm')
        seasonal_init, trend_init = self.decompsition(x)
        seasonal_init, trend_init = seasonal_init.permute(0,2,1), trend_init.permute(0,2,1)

        # do patching
        pad_result_s = self.process_layer(seasonal_init)
        pad_result_t = self.process_layer(trend_init)
        # [batch_size x channel x seq_len] -> [batch_size x channel x patch_num x patch_len]
        pad_result_s = pad_result_s.unfold(dimension=-1, size=self.patch_len, step=self.stride)
        pad_result_t = pad_result_t.unfold(dimension=-1, size=self.patch_len, step=self.stride)
        # [batch_size x channel x patch_num x patch_len] -> [batch_size x channel x patch_num x dim_model]
        pad_result_s = self.Linear_Seasonal[0](pad_result_s)
        pad_result_t = self.Linear_Trend[0](pad_result_t)
        shape = pad_result_s.shape
        # [batch_size x channel x patch_num x patch_len] -> [(batch_size * channel) x patch_num x dim_model]
        pad_result_s = torch.reshape(pad_result_s, (shape[0]*shape[1], shape[2], shape[3]))
        pad_result_t = torch.reshape(pad_result_t, (shape[0]*shape[1], shape[2], shape[3]))

        result_s = self.Linear_Seasonal[1](pad_result_s.permute(0,2,1)).permute(0,2,1)
        result_t = self.Linear_Trend[1](pad_result_t.permute(0,2,1)).permute(0,2,1)

        if self.use_gcn:
            result_s = torch.reshape(pad_result_s, (shape[0], shape[1], -1))
            result_t = torch.reshape(pad_result_t, (shape[0], shape[1], -1))

            seasonal_output = self.gconv1(result_s, A) + self.gconv2(result_s, A.transpose(1, 0))
            trend_output    = self.gconv1(result_t, A) + self.gconv2(result_t, A.transpose(1, 0))

            seasonal_output = torch.reshape(seasonal_output, (-1, shape[2], shape[3]))
            trend_output = torch.reshape(trend_output, (-1, shape[2], shape[3]))

            seasonal_output = pad_result_s + self.drop_Seasonal(seasonal_output)          # residual
            seasonal_output = self.norm_attn_s(seasonal_output)
            trend_output = pad_result_t + self.drop_Trend(trend_output)          # residual
            trend_output = self.norm_attn_t(trend_output)
        else:
            # seasonal_output = pad_result_s + self.drop_Seasonal(result_s)          # residual
            # seasonal_output = self.norm_attn_s(seasonal_output)
            # trend_output = pad_result_t + self.drop_Trend(result_t)          # residual
            # trend_output = self.norm_attn_t(trend_output)
            seasonal_output = result_s
            trend_output = result_t
            pass

        # [(batch_size * channel) x patch_num x dim_model] -> [batch_size x channel x (patch_num * dim_model)]
        seasonal_output = torch.reshape(seasonal_output, (shape[0], shape[1], -1))
        trend_output = torch.reshape(trend_output, (shape[0], shape[1], -1))
            
        seasonal_output = self.Linear_Seasonal[2](seasonal_output)
        trend_output = self.Linear_Trend[2](trend_output)

        x = seasonal_output + trend_output
        x = x.permute(0,2,1)
        if self.revin:
            x = self.revin_layer(x, 'denorm')
        if self.use_gcn:
            return x, A0, A
        else:
            return x
    

class Transpose(nn.Module):
    def __init__(self, *dims, contiguous=False): 
        super().__init__()
        self.dims, self.contiguous = dims, contiguous
    def forward(self, x):
        if self.contiguous: return x.transpose(*self.dims).contiguous()
        else: return x.transpose(*self.dims)