from __future__ import print_function, division
import math
import gc
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from .shared import conv_block, up_conv
from .sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, BatchNorm=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = BatchNorm(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               dilation=dilation, padding=dilation, bias=False)
        self.bn2 = BatchNorm(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = BatchNorm(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride
        self.dilation = dilation
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        if self.downsample is not None:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out
class ResNet101(nn.Module):
    def __init__(self, in_ch, block, layers, output_stride, BatchNorm, pretrained=True):
        self.inplanes = 64 
        super().__init__()
        blocks = [1, 2, 4]
        if output_stride == 16:
            strides = [2, 2, 2, 2]
            dilations = [1, 1, 1, 1]
        elif output_stride == 8:
            raise NotImplementedError
            # strides = [1, 2, 1, 1]
            # dilations = [1, 1, 2, 4]
        else:
            raise NotImplementedError
        # Modules
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, 64, kernel_size=7, stride=1, padding=3, bias=False),
            BatchNorm(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
            BatchNorm(64),
            nn.ReLU(inplace=True)
        )
        self.layer1 = self._make_layer(block, 32, layers[0], stride=strides[0], dilation=dilations[0], BatchNorm=BatchNorm)
        self.layer2 = self._make_layer(block, 64, layers[1], stride=strides[1], dilation=dilations[1], BatchNorm=BatchNorm)
        self.layer3 = self._make_layer(block, 128, layers[2], stride=strides[2], dilation=dilations[2], BatchNorm=BatchNorm)
        self.layer4 = self._make_layer(block, 256, layers[3], stride=strides[3], dilation=dilations[3], BatchNorm=BatchNorm)
        # self.layer4 = self._make_MG_unit(block, 512, blocks=blocks, stride=strides[3], dilation=dilations[3], BatchNorm=BatchNorm)
        self._init_weight()
        filters = [64, 256, 256, 512, 1024, 2048]
        self.Up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        self.conv0_0 = self.conv  # 64
        self.conv1_0 = self.layer1  # 128
        self.conv2_0 = self.layer2  # 256
        self.conv3_0 = self.layer3  # 512
        self.conv4_0 = self.layer4  # 1024
        if pretrained:
            self._load_pretrained_model()
    def _make_layer(self, block, planes, blocks, stride=1, dilation=1, BatchNorm=None):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                BatchNorm(planes * block.expansion),
            )
        layers = []
        layers.append(block(self.inplanes, planes, stride, dilation, downsample, BatchNorm))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, dilation=dilation, BatchNorm=BatchNorm))
        return nn.Sequential(*layers)
    def forward(self, x):
        x0 = self.conv0_0(x)
        x1 = self.conv1_0(x0)
        x2 = self.conv2_0(x1)
        x3 = self.conv3_0(x2)
        x4 = self.conv4_0(x3)
        return x0, x1, x2, x3, x4
    def _init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
    def _load_pretrained_model(self):
        pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/resnet101-5d3b4d8f.pth')
        model_dict = {}
        state_dict = self.state_dict()
        for k, v in pretrain_dict.items():
            if k in state_dict:
                model_dict[k] = v
        state_dict.update(model_dict)
        self.load_state_dict(state_dict)

def ResNet_101(in_ch, output_stride, BatchNorm=SynchronizedBatchNorm2d, pretrained=False):
    """Constructs a ResNet-101 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet101(in_ch, Bottleneck, [3, 4, 23, 3], output_stride, BatchNorm, pretrained=pretrained)
    return model


class UNet_boost_resnet(nn.Module):
    def __init__(self, in_ch=3, out_ch=1):
        super().__init__()
        n1 = 64
        filters = [n1, n1 * 2, n1 * 4, n1 * 8, n1 * 16]
        self.resnet = ResNet_101(in_ch, 16)
        self.Up5 = up_conv(filters[4], filters[3])
        self.Up_conv5 = conv_block(filters[4], filters[3])
        self.Up4 = up_conv(filters[3], filters[2])
        self.Up_conv4 = conv_block(filters[3], filters[2])
        self.Up3 = up_conv(filters[2], filters[1])
        self.Up_conv3 = conv_block(filters[2], filters[1])
        self.Up2 = up_conv(filters[1], filters[0])
        self.Up_conv2 = conv_block(filters[1], filters[0])
        self.Pred5 = nn.Sequential(
            conv_block(filters[4], filters[0]),
            nn.Conv2d(filters[0], out_ch,
                      kernel_size=1, stride=1, padding=0),
            )
        self.Pred4 = nn.Sequential(
            conv_block(filters[3], filters[0]),
            nn.Conv2d(filters[0], out_ch,
                      kernel_size=1, stride=1, padding=0),
            )
        self.Pred3 = nn.Sequential(
            conv_block(filters[2], filters[0]),
            nn.Conv2d(filters[0], out_ch,
                      kernel_size=1, stride=1, padding=0),
            )
        self.Pred2 = nn.Sequential(
            conv_block(filters[1], filters[0]),
            nn.Conv2d(filters[0], out_ch,
                      kernel_size=1, stride=1, padding=0),
            )
        self.Pred1 = nn.Sequential(
            conv_block(filters[0], filters[0]),
            nn.Conv2d(filters[0], out_ch,
                      kernel_size=1, stride=1, padding=0),
            )
        # self.active = torch.nn.Softmax(dim=1)
    def forward(self, x):
        e1, e2, e3, e4, e5 = self.resnet(x)
        sig = 5
        pred5 = self.Pred5(e5)
        c = F.interpolate(pred5.softmax(dim=1), scale_factor=2, mode='bilinear', align_corners=True)
        c = c.topk(k=2, dim=1)[0]  # certainty
        c = (c[:, 0, :, :] - c[:, 1, :, :]).unsqueeze(dim=1)
        c = ((c - 0.5) * sig).sigmoid()
        # c = 1 - Categorical(probs=pred5.softmax(dim=1).permute(0,2,3,1)[:, :, :, 1:5]).entropy().unsqueeze(dim=1) / 1.387  # B, 1, H, W
        d5 = self.Up5(e5)
        d5 = torch.cat((e4 * (2 - c), d5 * (1 + c)), dim=1)
        d5 = self.Up_conv5(d5)
        pred4 = self.Pred4(d5)
        c = F.interpolate(pred4.softmax(dim=1), scale_factor=2, mode='bilinear', align_corners=True)
        c = c.topk(k=2, dim=1)[0]  # ce
        c = (c[:, 0, :, :] - c[:, 1, :, :]).unsqueeze(dim=1)
        c = ((c - 0.5) * sig).sigmoid()
        # c = 1 - Categorical(probs=pred5.softmax(dim=1).permute(0,2,3,1)[:, :, :, 1:5]).entropy().unsqueeze(dim=1) / 1.387  # B, 1, H, W
        d4 = self.Up4(d5)
        d4 = torch.cat((e3 * (2 - c), d4 * (1 + c)), dim=1)
        d4 = self.Up_conv4(d4)
        pred3 = self.Pred3(d4)
        c = F.interpolate(pred3.softmax(dim=1), scale_factor=2, mode='bilinear', align_corners=True)
        c = c.topk(k=2, dim=1)[0]  # ce
        c = (c[:, 0, :, :] - c[:, 1, :, :]).unsqueeze(dim=1)
        c = ((c - 0.5) * sig).sigmoid()
        # c = 1 - Categorical(probs=pred5.softmax(dim=1).permute(0,2,3,1)[:, :, :, 1:5]).entropy().unsqueeze(dim=1) / 1.387  # B, 1, H, W
        d3 = self.Up3(d4)
        d3 = torch.cat((e2 * (2 - c), d3 * (1 + c)), dim=1)
        d3 = self.Up_conv3(d3)
        pred2 = self.Pred2(d3)
        c = F.interpolate(pred2.softmax(dim=1), scale_factor=2, mode='bilinear', align_corners=True)
        c = c.topk(k=2, dim=1)[0]  # ce
        c = (c[:, 0, :, :] - c[:, 1, :, :]).unsqueeze(dim=1)
        c = ((c - 0.5) * sig).sigmoid()
        # c = 1 - Categorical(probs=pred5.softmax(dim=1).permute(0,2,3,1)[:, :, :, 1:5]).entropy().unsqueeze(dim=1) / 1.387  # B, 1, H, W)
        d2 = self.Up2(d3)
        d2 = torch.cat((e1 * (2 - c), d2 * (1 + c)), dim=1)
        d2 = self.Up_conv2(d2)
        pred1 = self.Pred1(d2)
        # d1 = self.active(out)
        return pred5, pred4, pred3, pred2, pred1



class UNet_boost_resnet_easy(nn.Module):
    def __init__(self, in_ch=3, out_ch=1):
        super().__init__()
        n1 = 64
        filters = [n1, n1 * 2, n1 * 4, n1 * 8, n1 * 16]
        self.resnet = ResNet_101(in_ch, 16)
        self.Up5 = up_conv(filters[4], filters[3])
        self.Up_conv5 = conv_block(filters[4], filters[3])
        self.Up4 = up_conv(filters[3], filters[2])
        self.Up_conv4 = conv_block(filters[3], filters[2])
        self.Up3 = up_conv(filters[2], filters[1])
        self.Up_conv3 = conv_block(filters[2], filters[1])
        self.Up2 = up_conv(filters[1], filters[0])
        self.Up_conv2 = conv_block(filters[1], filters[0])
        self.Pred5 = nn.Sequential(
            conv_block(filters[4], filters[0]),
            nn.Conv2d(filters[0], out_ch,
                      kernel_size=1, stride=1, padding=0),
            )
        self.Pred4 = nn.Sequential(
            conv_block(filters[3], filters[0]),
            nn.Conv2d(filters[0], out_ch,
                      kernel_size=1, stride=1, padding=0),
            )
        self.Pred3 = nn.Sequential(
            conv_block(filters[2], filters[0]),
            nn.Conv2d(filters[0], out_ch,
                      kernel_size=1, stride=1, padding=0),
            )
        self.Pred2 = nn.Sequential(
            conv_block(filters[1], filters[0]),
            nn.Conv2d(filters[0], out_ch,
                      kernel_size=1, stride=1, padding=0),
            )
        self.Pred1 = nn.Sequential(
            conv_block(filters[0], filters[0]),
            nn.Conv2d(filters[0], out_ch,
                      kernel_size=1, stride=1, padding=0),
            )
        # self.active = torch.nn.Softmax(dim=1)
    def forward(self, x):
        e1, e2, e3, e4, e5 = self.resnet(x)
        sig = 5
        pred5 = self.Pred5(e5)
        
        d5 = self.Up5(e5)
        d5 = torch.cat((e4, d5), dim=1)
        d5 = self.Up_conv5(d5)
        pred4 = self.Pred4(d5)

        d4 = self.Up4(d5)
        d4 = torch.cat((e3, d4), dim=1)
        d4 = self.Up_conv4(d4)
        pred3 = self.Pred3(d4)
        
        d3 = self.Up3(d4)
        d3 = torch.cat((e2, d3), dim=1)
        d3 = self.Up_conv3(d3)
        pred2 = self.Pred2(d3)
        
        d2 = self.Up2(d3)
        d2 = torch.cat((e1, d2), dim=1)
        d2 = self.Up_conv2(d2)
        pred1 = self.Pred1(d2)
        # d1 = self.active(out)
        return pred5, pred4, pred3, pred2, pred1