from __future__ import print_function, division
import math
import gc

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data

from .shared import conv_block, up_conv

class UNet_edge_implicit(nn.Module):
    """
    UNet - Basic Implementation
    Paper : https://arxiv.org/abs/1505.04597
    """

    def __init__(self, in_ch=3, out_ch=1):
        super(UNet_edge_implicit, self).__init__()

        n1 = 64
        filters = [n1, n1 * 2, n1 * 4, n1 * 8, n1 * 16]

        self.Maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.Maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.Maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.Maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.Conv1 = conv_block(in_ch, filters[0])
        self.Conv2 = conv_block(filters[0], filters[1])
        self.Conv3 = conv_block(filters[1], filters[2])
        self.Conv4 = conv_block(filters[2], filters[3])
        self.Conv5 = conv_block(filters[3], filters[4])

        self.Up5 = up_conv(filters[4], filters[3])
        self.Up_conv5 = conv_block(filters[4], filters[3])

        self.Up4 = up_conv(filters[3], filters[2])
        self.Up_conv4 = conv_block(filters[3], filters[2])

        self.Up3 = up_conv(filters[2], filters[1])
        self.Up_conv3 = conv_block(filters[2], filters[1])

        self.Up2 = up_conv(filters[1], filters[0])
        self.Up_conv2 = conv_block(filters[1], filters[1])

        self.edge1 = conv_block(filters[0],filters[0], kernel_size=9)
        self.edge2 = nn.Conv2d(filters[0], 1, kernel_size=9, padding=4)
        
        self.area1 = conv_block(filters[0],filters[0], kernel_size=9)
        self.area2 = nn.Conv2d(filters[0], out_ch, kernel_size=9, padding=4)

        self.filter0 = filters[0]

        self.output1 = conv_block(filters[1],filters[1], kernel_size=9)
        self.output2 = nn.Conv2d(filters[1], out_ch, kernel_size=9, padding=4)

        
        # # small kernel
        # self.edge1 = conv_block(filters[0],filters[0], kernel_size=3)
        # self.edge2 = nn.Conv2d(filters[0], 1, kernel_size=1, padding=0)
        
        # self.area1 = conv_block(filters[0],filters[0], kernel_size=3)
        # self.area2 = nn.Conv2d(filters[0], out_ch, kernel_size=1, padding=0)

        # self.filter0 = filters[0]

        # self.output1 = conv_block(filters[1],filters[1], kernel_size=3)
        # self.output2 = nn.Conv2d(filters[1], out_ch, kernel_size=1, padding=0)

        self.img_q = conv_block(in_ch, filters[0], kernel_size=9)

    def forward(self, x):
        e1 = self.Conv1(x)

        e2 = self.Maxpool1(e1)
        e2 = self.Conv2(e2)

        e3 = self.Maxpool2(e2)
        e3 = self.Conv3(e3)

        e4 = self.Maxpool3(e3)
        e4 = self.Conv4(e4)

        e5 = self.Maxpool4(e4)
        e5 = self.Conv5(e5)

        d5 = self.Up5(e5)
        d5 = torch.cat((e4, d5), dim=1)

        d5 = self.Up_conv5(d5)

        d4 = self.Up4(d5)
        d4 = torch.cat((e3, d4), dim=1)
        d4 = self.Up_conv4(d4)

        d3 = self.Up3(d4)
        d3 = torch.cat((e2, d3), dim=1)
        d3 = self.Up_conv3(d3)

        d2 = self.Up2(d3)
        d2 = torch.cat((e1, d2), dim=1)
        d2 = self.Up_conv2(d2)

        edge1 = self.edge1(d2[:, :self.filter0, :, :]) 
        edge2 = self.edge2(edge1)

        area1 = self.area1(d2[:, self.filter0:, :, :])
        area2 = self.area2(area1)

        img_q = self.img_q(x)
        att_e = (img_q * edge1).sum(dim=1, keepdim=True) / math.sqrt(self.filter0)
        att_a = (img_q * area1).sum(dim=1, keepdim=True) / math.sqrt(self.filter0)
        att = torch.cat((att_e, att_a), dim=1)
        att = F.softmax(att, dim=1)

        # out = d2.clone()
        out = torch.cat((edge1, area1), dim=1)
        out[:, :self.filter0, :, :] *= att[:, 0:1, :, :]
        out[:, self.filter0:, :, :] *= att[:, 1:2, :, :]

        out = self.output2(self.output1(out))


        # d1 = self.active(out)

        return edge2, area2, out
