import torch
import torch.nn as nn
from operations import *
from torch.autograd import Variable
from utils import drop_path
import torch.nn.functional as F

class ECALayer(nn.Module):
    def __init__(self, channel, k_size=3):
        super(ECALayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1)//2, bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, 1, c)
        y = self.conv(y)
        y = self.sigmoid(y).view(b, c, 1, 1)
        return x * y.expand_as(x)


class SELayer(nn.Module):

    def __init__(self, channel: int, reduction: int = 16):
        super().__init__()
        hidden = max(1, channel // reduction)
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Conv2d(channel, hidden, kernel_size=1, bias=True),
            nn.ReLU(inplace=True),
            nn.Conv2d(hidden, channel, kernel_size=1, bias=True)
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        y = self.avg_pool(x)
        y = self.fc(y)
        y = self.sigmoid(y)
        return x * y


class _CBAMChannel(nn.Module):

    def __init__(self, channel: int, reduction: int = 16):
        super().__init__()
        hidden = max(1, channel // reduction)
        self.mlp = nn.Sequential(
            nn.Conv2d(channel, hidden, kernel_size=1, bias=False),
            nn.ReLU(inplace=True),
            nn.Conv2d(hidden, channel, kernel_size=1, bias=False)
        )
        self.sigmoid = nn.Sigmoid()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        avg_out = self.mlp(self.avg_pool(x))
        max_out = self.mlp(self.max_pool(x))
        out = avg_out + max_out
        return self.sigmoid(out)

class _CBAMSpatial(nn.Module):

    def __init__(self, kernel_size: int = 7):
        super().__init__()
        assert kernel_size in (3, 7)
        padding = (kernel_size - 1) // 2
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        y = torch.cat([avg_out, max_out], dim=1)
        y = self.conv(y)
        return self.sigmoid(y)

class CBAM(nn.Module):

    def __init__(self, channel: int, reduction: int = 16, spatial_kernel: int = 7):
        super().__init__()
        self.ca = _CBAMChannel(channel, reduction=reduction)
        self.sa = _CBAMSpatial(kernel_size=spatial_kernel)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Channel Attention
        ca = self.ca(x)
        x = x * ca
        # Spatial Attention
        sa = self.sa(x)
        x = x * sa
        return x


class _ECAMChannelECA(nn.Module):

    def __init__(self, channel: int, k_size: int = 3):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1)//2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, 1, c)   # (B,1,C)
        y = self.conv(y)
        y = self.sigmoid(y).view(b, c, 1, 1)
        return y

class _ECAMSpatialLite(nn.Module):

    def __init__(self, kernel_size: int = 5):
        super().__init__()
        padding = (kernel_size - 1) // 2
        self.conv = nn.Conv2d(1, 1, kernel_size=kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        avg_map = torch.mean(x, dim=1, keepdim=True)          # (B,1,H,W)
        max_map, _ = torch.max(x, dim=1, keepdim=True)        # (B,1,H,W)
        hint = 0.5 * (avg_map + max_map)                      # (B,1,H,W)
        y = self.conv(hint)
        return self.sigmoid(y)

class ECAM(nn.Module):

    def __init__(self, channel: int, k_c: int = 3, k_s: int = 5):
        super().__init__()
        self.eca = _ECAMChannelECA(channel, k_size=k_c)
        self.spa = _ECAMSpatialLite(kernel_size=k_s)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Channel: ECA
        ca = self.eca(x)     # (B,C,1,1)
        x = x * ca
        # Spatial: Lite Spatial
        sa = self.spa(x)     # (B,1,H,W)
        x = x * sa
        return x


class DualBranchCell(nn.Module):
    def __init__(self, genotype, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
        super(DualBranchCell, self).__init__()
        self.reduction = False
        self.multiplier = 4
        self.cell_robust = Cell(genotype, C_prev_prev, C_prev, C, reduction, reduction_prev, False)
        self.cell_normal = Cell(genotype, C_prev_prev, C_prev, C, reduction, reduction_prev, True)
        self.fusion = nn.Sequential(
            nn.ReLU(inplace=False),
            nn.Conv2d(2 * multiplier * C, multiplier * C, kernel_size=1, bias=False),
            nn.BatchNorm2d(multiplier * C),
            ECALayer(multiplier * C, k_size=3),

        )

    def forward(self, states, drop_path_prob, cell_index):

        states_normal = self.cell_normal(states, drop_path_prob, cell_index)
        states_robust = self.cell_robust(states, drop_path_prob, cell_index)

        robust_feature = states_robust[-1]['feature']
        normal_feature = states_normal[-1]['feature']

        fused_feature = torch.cat([robust_feature, normal_feature], dim=1)

        fused_feature = self.fusion(fused_feature)
        new_state = {'feature': fused_feature, 'cell_index': cell_index, 'node_index': -1}
        return states + [new_state]

    def wider(self, k):
        self.cell_robust.wider(k)
        self.cell_normal.wider(k)


class Cell(nn.Module):

    def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev, normal):
        super(Cell, self).__init__()
        #print(C_prev_prev, C_prev, C)
        self.reduction = reduction

        if reduction_prev:
            self.preprocess0 = FactorizedReduce(C_prev_prev, C)
        else:
            self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0) # 256
        self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)  #512



        if reduction:
            op_names, indices = zip(*genotype.reduce)
            concat = genotype.reduce_concat
        elif normal:
            op_names, indices = zip(*genotype.normal)
            concat = genotype.normal_concat
        else:
            op_names, indices = zip(*genotype.robust)
            concat = genotype.robust_concat
        self._compile(C, op_names, indices, concat, reduction)

    def _compile(self, C, op_names, indices, concat, reduction):
        assert len(op_names) == len(indices)
        self._steps = len(op_names) // 2
        self._concat = concat
        self.multiplier = len(concat)

        self._ops = nn.ModuleList()
        for name, index in zip(op_names, indices):
            stride = 2 if reduction and index < 2 else 1
            op = OPS[name](C, stride, True)
            self._ops += [op]
        self._indices = indices


    def forward(self, states, drop_prob, cell_index):

        state = states[cell_index - 1]

        if cell_index == 1:
            s0 = self.preprocess0(state[-2]['feature'])
            s1 = self.preprocess1(state[-1]['feature'])
        else:
            s0 = self.preprocess0(states[cell_index-2][-1]['feature'])
            s1 = self.preprocess1(states[cell_index-1][-1]['feature'])



        all_states = [{'feature': s0, 'cell_index': cell_index, 'node_index': -1},
                      {'feature': s1, 'cell_index': cell_index, 'node_index': -1}]


        states = [s0, s1]
        k = 0
        for i in range(self._steps):
            h1 = all_states[self._indices[2 * i]]['feature']
            h2 = all_states[self._indices[2 * i + 1]]['feature']
            op1 = self._ops[2 * i]
            op2 = self._ops[2 * i + 1]
            h1 = op1(h1)
            h2 = op2(h2)
            if self.training and drop_prob > 0.:
                if not isinstance(op1, Identity):
                    h1 = drop_path(h1, drop_prob)
                if not isinstance(op2, Identity):
                    h2 = drop_path(h2, drop_prob)
            s = h1 + h2
            all_states.append({'feature': s, 'cell_index': cell_index, 'node_index': i})
            k += 1
        k += 1
        result = torch.cat([all_states[i]['feature'] for i in self._concat], dim=1)
        all_states.append({'feature': result, 'cell_index': cell_index, 'node_index': k})
        return all_states


class RobustStem(nn.Module):
    def __init__(self, C_curr):
        super(RobustStem, self).__init__()

        self.conv1 = nn.Conv2d(3, C_curr, kernel_size=3, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(C_curr)
        self.relu1 = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(C_curr, 2 * C_curr, kernel_size=5, padding=2, bias=False)
        self.bn2 = nn.BatchNorm2d(2 * C_curr)
        self.relu2 = nn.ReLU(inplace=True)

        self.dilated_conv = nn.Conv2d(2 * C_curr, 2 * C_curr, kernel_size=3, padding=2, dilation=2, bias=False)
        self.bn_dilated = nn.BatchNorm2d(2 * C_curr)
        self.relu_dilated = nn.ReLU(inplace=True)
        self.group_conv = nn.Conv2d(2 * C_curr, 2 * C_curr, kernel_size=3, padding=1, groups=2, bias=False)
        self.bn_group = nn.BatchNorm2d(2 * C_curr)
        self.relu_group = nn.ReLU(inplace=True)
        self.eca = ECALayer(2 * C_curr)
        self.reduce_channels = nn.Sequential(
            nn.Conv2d(2 * C_curr, C_curr, kernel_size=1, padding=0, bias=False),
            nn.BatchNorm2d(C_curr),
            nn.ReLU(inplace=True),
        )
        self.alpha_param = nn.Parameter(torch.zeros(1))
        self.skip = nn.Sequential(
            nn.Conv2d(3, C_curr, 1, bias=False),
            nn.BatchNorm2d(C_curr),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        x_skip = self.skip(x)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.dilated_conv(x)
        x = self.bn_dilated(x)
        x = self.relu_dilated(x)
        x = self.group_conv(x)
        x = self.bn_group(x)
        x = self.relu_group(x)
        x = self.eca(x)
        x = self.reduce_channels(x)
        alpha = torch.sigmoid(self.alpha_param)
        fused = alpha * x + (1.0 - alpha) * x_skip
        return fused

class AuxiliaryHeadTinyImageNet(nn.Module):
    def __init__(self, C_in, num_classes):
        super().__init__()
        self.features = nn.Sequential(
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d(2),
            nn.Conv2d(C_in, 128, 1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 768, 1, bias=False),
            nn.BatchNorm2d(768),
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d(1)
        )
        self.classifier = nn.Linear(768, num_classes)

    def forward(self, x):
        x = self.features(x)          # (B, 768, 1, 1)
        return self.classifier(x.view(x.size(0), -1))


class NetworkCIFAR(nn.Module):

    def __init__(self, C, num_classes, layers, auxiliary, genotype, drop_path_prob=0,
                 mean=(0.485, 0.456, 0.406),
                 std=(0.229, 0.224, 0.225)
                 ):
        super(NetworkCIFAR, self).__init__()
        self._layers = layers
        self._auxiliary = auxiliary
        self.drop_path_prob = drop_path_prob
        stem_multiplier = 4
        C_curr = stem_multiplier * C # 256
        self.register_buffer('_mean', torch.tensor(mean).view(1,3,1,1))
        self.register_buffer('_std',  torch.tensor(std ).view(1,3,1,1))
        self.stem = RobustStem(C_curr)
        C_prev_prev, C_prev, C_curr = C_curr, C_curr, C  # 256 256 64  i=0-2
        self.cells = nn.ModuleList()
        reduction_prev = False
        for i in range(layers):
            if i in [layers // 3, 2 * layers // 3]:
                C_curr *= 2
                reduction = True

                cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev, True)

            else:
                reduction = False
                cell = DualBranchCell(genotype, stem_multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)

            reduction_prev = reduction
            self.cells.append(cell)
            C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
            if i == 2*layers//3:
                C_to_auxiliary = C_prev

        if auxiliary:
            self.auxiliary_head = AuxiliaryHeadTinyImageNet(C_to_auxiliary, num_classes)

        self.global_pooling = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Linear(C_prev, num_classes)


    def forward(self, input):
        input = (input - self._mean) / self._std
        logits_aux = None
        s0 = self.stem(input)
        s1 = self.stem(input)
        cell_front_1 = 1


        state = [{'feature': s0, 'cell_index': -1, 'node_index': 0},
                  {'feature': s1, 'cell_index': -1, 'node_index': 1}]
        cell_tick = 0
        states = [state]
        features = []


        for i, cell in enumerate(self.cells):

            updated_states = cell(states, self.drop_path_prob, cell_index=i + 1)
            cell_tick +=1
            # 更新states
            states.append(updated_states)
            if i == 2 * self._layers // 3:
                if self._auxiliary and self.training:
                    logits_aux = self.auxiliary_head(states[-1][-1]['feature'])



        out = self.global_pooling(states[-1][-1]['feature'])
        logits = self.classifier(out.view(out.size(0), -1))

        if self._auxiliary:
            return logits, logits_aux
        return logits



