import os
from typing import Any, Type

import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.spectral_norm import SpectralNorm

# this implementation is borrowed from https://github.com/meliketoy/wide-resnet.pytorch/blob/master/networks/wide_resnet.py

T = torch.Tensor


def conv3x3(in_planes: int, out_planes: int, stride: int = 1, ctype: str = "none", c: float = 1.0) -> nn.Module:
    return SpectralNorm(
        nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True),
        ctype="none",
        c=c
    )


def conv1x1(in_planes: int, out_planes: int, stride: int = 1, ctype: str = "none", c: float = 1.0) -> nn.Module:
    return SpectralNorm(
        nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=True),
        ctype="none",
        c=c
    )


class WideBasic(nn.Module):
    def __init__(
        self,
        in_planes: int,
        planes: int,
        p: float,
        stride: int = 1,
        dropout_layer: Any = nn.Dropout,
        ctype: str = "none",
        c: float = 1.0
    ):
        super(WideBasic, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = conv3x3(in_planes, planes, ctype=ctype, c=c)
        self.dropout = dropout_layer(p=p)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes, stride=stride, ctype=ctype, c=c)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(conv1x1(in_planes, planes, stride=stride, ctype=ctype, c=c), dropout_layer(p=p))

    def forward(self, x: T) -> T:
        out = self.conv1(self.dropout(F.relu(self.bn1(x))))
        out = self.conv2(self.dropout(F.relu(self.bn2(out))))
        out += self.shortcut(x)
        return out  # type: ignore


class WideResNetSN(nn.Module):
    def __init__(
        self,
        depth: int,
        widen_factor: int,
        p: float = 0.1,
        num_classes: int = 10,
        name: str = None,
        filterwise_dropout: bool = True,
        ctype: str = "none",
        c: float = 1.0
    ):
        super().__init__()
        if name is None:
            raise ValueError("name cannot be None for WideResNet")

        get_drop_layer = {False: (nn.Dropout, "dropout"), True: (nn.Dropout2d, "filterwise-dropout")}
        dropout_layer, drop_layername = get_drop_layer[filterwise_dropout]

        self.in_planes = 16
        self.name = os.path.join(name, drop_layername, f"p-{p}")
        self.ctype = ctype
        self.c = c

        assert ((depth - 4) % 6 == 0), 'Wide-resnet depth should be 6n+4'
        n = (depth - 4) // 6
        k = widen_factor

        nStages = [16, 16 * k, 32 * k, 64 * k]

        self.conv1 = nn.Sequential(conv3x3(3 , nStages[0], ctype=ctype, c=c), dropout_layer(p=p))
        self.layer1 = self._wide_layer(WideBasic, nStages[1], n, p, stride=1, dropout_layer=dropout_layer)
        self.layer2 = self._wide_layer(WideBasic, nStages[2], n, p, stride=2, dropout_layer=dropout_layer)
        self.layer3 = self._wide_layer(WideBasic, nStages[3], n, p, stride=2, dropout_layer=dropout_layer)
        self.bn1 = nn.BatchNorm2d(nStages[3], momentum=0.9)

        self.h_dim = nStages[3]

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _wide_layer(self, block: Type[WideBasic], planes: int, num_blocks: int, dropout_rate: float, stride: int, dropout_layer: Any) -> nn.Module:
        strides = [stride] + [1] * (int(num_blocks) - 1)
        layers = []

        for stride in strides:
            layers.append(
                block(self.in_planes, planes, dropout_rate, stride, dropout_layer=dropout_layer, ctype=self.ctype, c=self.c)
            )
            self.in_planes = planes

        return nn.Sequential(*layers)

    def forward(self, x: T) -> T:
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = torch.flatten(out, start_dim=1)
        return out  # type: ignore


def wide_sn_resnet28_10_cifar(**kwargs: Any) -> WideResNetSN:
    return WideResNetSN(28, 10, name="WideResNetSN28-10", **kwargs)
