from __future__ import print_function, division
import math
import gc

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data

from .shared import conv_block, up_conv
from .sync_batchnorm.batchnorm import SynchronizedBatchNorm2d

class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, BatchNorm=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = BatchNorm(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               dilation=dilation, padding=dilation, bias=False)
        self.bn2 = BatchNorm(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = BatchNorm(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride
        self.dilation = dilation

    def forward(self, x):

        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class ResNet101(nn.Module):

    def __init__(self, in_ch, block, layers, output_stride, BatchNorm, pretrained=True):
        self.inplanes = 64 
        super().__init__()
        blocks = [1, 2, 4]
        if output_stride == 16:
            strides = [2, 2, 2, 2]
            dilations = [1, 1, 1, 1]
        elif output_stride == 8:
            raise NotImplementedError
            # strides = [1, 2, 1, 1]
            # dilations = [1, 1, 2, 4]
        else:
            raise NotImplementedError

        # Modules
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, 64, kernel_size=7, stride=1, padding=3, bias=False),
            BatchNorm(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
            BatchNorm(64),
            nn.ReLU(inplace=True)
        )

        self.layer1 = self._make_layer(block, 32, layers[0], stride=strides[0], dilation=dilations[0], BatchNorm=BatchNorm)
        self.layer2 = self._make_layer(block, 64, layers[1], stride=strides[1], dilation=dilations[1], BatchNorm=BatchNorm)
        self.layer3 = self._make_layer(block, 128, layers[2], stride=strides[2], dilation=dilations[2], BatchNorm=BatchNorm)
        self.layer4 = self._make_layer(block, 256, layers[3], stride=strides[3], dilation=dilations[3], BatchNorm=BatchNorm)
        # self.layer4 = self._make_MG_unit(block, 512, blocks=blocks, stride=strides[3], dilation=dilations[3], BatchNorm=BatchNorm)
        self._init_weight()

        filters = [64, 256, 256, 512, 1024, 2048]
        self.Up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.conv0_0 = self.conv  # 64
        self.conv1_0 = self.layer1  # 128
        self.conv2_0 = self.layer2  # 256
        self.conv3_0 = self.layer3  # 512
        self.conv4_0 = self.layer4  # 1024

        if pretrained:
            self._load_pretrained_model()

    def _make_layer(self, block, planes, blocks, stride=1, dilation=1, BatchNorm=None):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                BatchNorm(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, dilation, downsample, BatchNorm))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, dilation=dilation, BatchNorm=BatchNorm))

        return nn.Sequential(*layers)

    def forward(self, x):

        x0 = self.conv0_0(x)
        x1 = self.conv1_0(x0)
        x2 = self.conv2_0(x1)
        x3 = self.conv3_0(x2)
        x4 = self.conv4_0(x3)

        return x0, x1, x2, x3, x4

    def _init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _load_pretrained_model(self):
        pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/resnet101-5d3b4d8f.pth')
        model_dict = {}
        state_dict = self.state_dict()
        for k, v in pretrain_dict.items():
            if k in state_dict:
                model_dict[k] = v
        state_dict.update(model_dict)
        self.load_state_dict(state_dict)


def ResNet_101(in_ch, output_stride, BatchNorm=SynchronizedBatchNorm2d, pretrained=False):
    """Constructs a ResNet-101 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet101(in_ch, Bottleneck, [3, 4, 23, 3], output_stride, BatchNorm, pretrained=pretrained)
    return model


class ResUNetScale(nn.Module):

    def __init__(self, in_ch=3, num_classes=1):
        super().__init__()

        n1 = 64
        filters = [n1, n1 * 2, n1 * 4, n1 * 8, n1 * 16]

        self.resnet = ResNet_101(in_ch, 16)

        self.Up5 = up_conv(filters[4], filters[3])
        self.Up_conv5 = conv_block(filters[4], filters[3])

        self.Up4 = up_conv(filters[3], filters[2])
        self.Up_conv4 = conv_block(filters[3], filters[2])

        self.Up3 = up_conv(filters[2], filters[1])
        self.Up_conv3 = conv_block(filters[2], filters[1])

        self.Up2 = up_conv(filters[1], filters[0])
        self.Up_conv2 = conv_block(filters[1], filters[0])

        self.Pred5 = nn.Sequential(
            conv_block(filters[4], filters[0]),
            nn.Conv2d(filters[0], num_classes,
                      kernel_size=1, stride=1, padding=0),
            )
        self.Pred4 = nn.Sequential(
            conv_block(filters[3], filters[0]),
            nn.Conv2d(filters[0], num_classes,
                      kernel_size=1, stride=1, padding=0),
            )
        self.Pred3 = nn.Sequential(
            conv_block(filters[2], filters[0]),
            nn.Conv2d(filters[0], num_classes,
                      kernel_size=1, stride=1, padding=0),
            )
        self.Pred2 = nn.Sequential(
            conv_block(filters[1], filters[0]),
            nn.Conv2d(filters[0], num_classes,
                      kernel_size=1, stride=1, padding=0),
            )
        self.Pred1 = nn.Sequential(
            conv_block(filters[0], filters[0]),
            nn.Conv2d(filters[0], num_classes,
                      kernel_size=1, stride=1, padding=0),
            )
        # self.active = torch.nn.Softmax(dim=1)

    def forward(self, x):
        e1, e2, e3, e4, e5 = self.resnet(x)

        sig = 5

        pred5 = self.Pred5(e5)
        c = F.interpolate(pred5.softmax(dim=1), scale_factor=2, mode='bilinear', align_corners=True)
        c = c.topk(k=2, dim=1)[0]  # certainty
        c = (c[:, 0, :, :] - c[:, 1, :, :]).unsqueeze(dim=1)
        c = ((c - 0.5) * sig).sigmoid()
        # c = 1 - Categorical(probs=pred5.softmax(dim=1).permute(0,2,3,1)[:, :, :, 1:5]).entropy().unsqueeze(dim=1) / 1.387  # B, 1, H, W
        d5 = self.Up5(e5)
        d5 = torch.cat((e4 * (2 - c), d5 * (1 + c)), dim=1)
        d5 = self.Up_conv5(d5)

        pred4 = self.Pred4(d5)
        c = F.interpolate(pred4.softmax(dim=1), scale_factor=2, mode='bilinear', align_corners=True)
        c = c.topk(k=2, dim=1)[0]  # ce
        c = (c[:, 0, :, :] - c[:, 1, :, :]).unsqueeze(dim=1)
        c = ((c - 0.5) * sig).sigmoid()
        # c = 1 - Categorical(probs=pred5.softmax(dim=1).permute(0,2,3,1)[:, :, :, 1:5]).entropy().unsqueeze(dim=1) / 1.387  # B, 1, H, W
        d4 = self.Up4(d5)
        d4 = torch.cat((e3 * (2 - c), d4 * (1 + c)), dim=1)
        d4 = self.Up_conv4(d4)

        pred3 = self.Pred3(d4)
        c = F.interpolate(pred3.softmax(dim=1), scale_factor=2, mode='bilinear', align_corners=True)
        c = c.topk(k=2, dim=1)[0]  # ce
        c = (c[:, 0, :, :] - c[:, 1, :, :]).unsqueeze(dim=1)
        c = ((c - 0.5) * sig).sigmoid()
        # c = 1 - Categorical(probs=pred5.softmax(dim=1).permute(0,2,3,1)[:, :, :, 1:5]).entropy().unsqueeze(dim=1) / 1.387  # B, 1, H, W
        d3 = self.Up3(d4)
        d3 = torch.cat((e2 * (2 - c), d3 * (1 + c)), dim=1)
        d3 = self.Up_conv3(d3)

        pred2 = self.Pred2(d3)
        c = F.interpolate(pred2.softmax(dim=1), scale_factor=2, mode='bilinear', align_corners=True)
        c = c.topk(k=2, dim=1)[0]  # ce
        c = (c[:, 0, :, :] - c[:, 1, :, :]).unsqueeze(dim=1)
        c = ((c - 0.5) * sig).sigmoid()
        # c = 1 - Categorical(probs=pred5.softmax(dim=1).permute(0,2,3,1)[:, :, :, 1:5]).entropy().unsqueeze(dim=1) / 1.387  # B, 1, H, W)
        d2 = self.Up2(d3)
        d2 = torch.cat((e1 * (2 - c), d2 * (1 + c)), dim=1)
        d2 = self.Up_conv2(d2)

        pred1 = self.Pred1(d2)

        # d1 = self.active(out)

        return pred1, pred2, pred3, pred4, pred5

        # d5 = self.Up5(e5)
        # d5 = torch.cat((e4, d5), dim=1)

        # d5 = self.Up_conv5(d5)

        # d4 = self.Up4(d5)
        # d4 = torch.cat((e3, d4), dim=1)
        # d4 = self.Up_conv4(d4)

        # d3 = self.Up3(d4)
        # d3 = torch.cat((e2, d3), dim=1)
        # d3 = self.Up_conv3(d3)

        # d2 = self.Up2(d3)
        # d2 = torch.cat((e1, d2), dim=1)
        # d2 = self.Up_conv2(d2)

        # out = self.Conv(d2)

        # # d1 = self.active(out)

        # return out
