
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import torchvision
from itertools import chain

from network.modules.resnet_hacks import modify_resnet_model
from network.modules.identity import Identity
from collections import OrderedDict

torch.autograd.set_detect_anomaly(True)

def freeze_(model):
    """Freeze model
    Note that this function does not control BN
    """
    for p in model.parameters():
        p.requires_grad_(False)
        
def reparametrize(mu, logvar, factor=0.2):
    std = logvar.div(2).exp()
    eps = std.data.new(std.size()).normal_()
    return mu + factor*std*eps

class AlexNetCaffe(nn.Module):
    def __init__(self, n_classes=100, dropout=True):
        super(AlexNetCaffe, self).__init__()
        self.features = nn.Sequential(OrderedDict([
            ("conv1", nn.Conv2d(3, 96, kernel_size=11, stride=4)),
            ("relu1", nn.ReLU(inplace=True)),
            ("pool1", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)),
            ("norm1", nn.LocalResponseNorm(5, 1.e-4, 0.75)),
            ("conv2", nn.Conv2d(96, 256, kernel_size=5, padding=2, groups=2)),
            ("relu2", nn.ReLU(inplace=True)),
            ("pool2", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)),
            ("norm2", nn.LocalResponseNorm(5, 1.e-4, 0.75)),
            ("conv3", nn.Conv2d(256, 384, kernel_size=3, padding=1)),
            ("relu3", nn.ReLU(inplace=True)),
            ("conv4", nn.Conv2d(384, 384, kernel_size=3, padding=1, groups=2)),
            ("relu4", nn.ReLU(inplace=True)),
            ("conv5", nn.Conv2d(384, 256, kernel_size=3, padding=1, groups=2)),
            ("relu5", nn.ReLU(inplace=True)),
            ("pool5", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)),
        ]))
        self.classifier = nn.Sequential(OrderedDict([
            ("fc6", nn.Linear(256 * 6 * 6, 4096)),
            ("relu6", nn.ReLU(inplace=True)),
            ("drop6", nn.Dropout()),
            ("fc7", nn.Linear(4096, 4096)),
            ("relu7", nn.ReLU(inplace=True)),
            ("drop7", nn.Dropout())
            ]))
        

    def forward(self, x, train=True):
        end_points={}
        x = self.features(x*57.6)  #57.6 is the magic number needed to bring torch data back to the range of caffe data, based on used std
        x = x.view(x.size(0), -1)
        x= self.classifier(x)
        return x

class ConvNet(nn.Module):
    def __init__(self, projection_dim, output_dim, imdim=3, oracle= False):
        super(ConvNet, self).__init__()
        #added 
        self.pretrained= None
        self.encoder= AlexNetCaffe()
        self.projection_dim= projection_dim
        self.output_dim= output_dim
        self.oracle= oracle
        self.selected_out = OrderedDict()
        self.fhooks=[]
        self.buffer_features= 3024 
        self.encoder_features=  4096 
        self.n_features= 256 * 6 * 6
        
        for m in self.encoder.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight, .1)
                nn.init.constant_(m.bias, 0.)
        
        state_dict = torch.load(os.path.join(os.path.dirname(__file__), "$HERE GOES THE PATH TO THE alexnet_caffe.pth.tar$")) #####HERE!
        del state_dict["classifier.fc8.weight"]
        del state_dict["classifier.fc8.bias"]
        self.encoder.load_state_dict(state_dict, strict=False)
        

        self.cls_head= nn.Sequential(#nn.ReLU(),
            nn.Linear(self.encoder_features,self.encoder_features),
            nn.ReLU(),
            nn.Linear(self.encoder_features,self.encoder_features),
            nn.ReLU(),
            nn.Linear(self.encoder_features, self.output_dim))
        
        #original
        self.pro_head = nn.Sequential(#nn.ReLU(),
            nn.Linear(self.encoder_features, self.projection_dim, bias=False),  
            nn.BatchNorm1d(self.projection_dim),
            nn.ReLU(),
            nn.Linear(self.projection_dim, self.projection_dim, bias=False),  
            nn.BatchNorm1d(self.projection_dim),
            nn.ReLU(),
            nn.Linear(self.projection_dim, self.projection_dim, bias=False) 
            )
        
        
    def get_hook(self):   
        for i,l in enumerate(list(self.encoder._modules.keys())):
            self.fhooks.append(getattr(self.encoder,l).register_forward_hook(self.forward_hook(l)))
        
    def forward_hook(self,layer_name):
        def hook(module, input, output):
            self.selected_out[layer_name] = output
        return hook
    def freeze_bn(self):
        for module in self.modules():
            if isinstance(module, nn.BatchNorm2d):
                if hasattr(module, 'weight'):
                    module.weight.requires_grad_(False)
                if hasattr(module, 'bias'):
                    module.bias.requires_grad_(False)
                module.eval()
    
        
    def forward(self, x, mode='test'):
        in_size = x.size(0)
        
        encoded= self.encoder(x)        
        
        if mode == 'test':
            p = self.cls_head(encoded)
            return p
        elif mode == 'train':
            p = self.cls_head(encoded)
            z = self.pro_head(encoded)
            return p,z
        elif mode == 'prof':
            p = self.cls_head(encoded)
            z = self.pro_head(encoded)            
            return p,z,encoded, self.selected_out   
        