import torch
import numpy as np
from torch.autograd import Variable
import torch.nn as nn
import torch.optim
import json
import torch.utils.data.sampler
import os
import glob
import random
import time
import numpy as np
import configs
import backbone
import data.feature_loader as feat_loader
from data.datamgr import SetDataManager, SimpleDataManager,SimpleDataManagerWithoutNormalize
from methods.baselinetrain import BaselineTrain
# from methods.baselinefinetune import BaselineFinetune
from methods.protonet import ProtoNet
from methods.matchingnet import MatchingNet
from methods.relationnet import RelationNet
from methods.maml import MAML
from io_utils import model_dict, parse_args, get_resume_file, get_best_file, get_assigned_file
import pdb
from methods.meta_template import MetaTemplate
from PIL import Image
import torch.nn.functional as F
from torchvision import utils


class BaselineFinetune(MetaTemplate):
    def __init__(self, model_func, n_way, n_support,dataset, loss_type="softmax"):
        super(BaselineFinetune, self).__init__(model_func, n_way, n_support)
        self.loss_type = loss_type

        self.k = 20
        self.a = 2.0/255
        self.epsilon = 8.0/255
        self.dataset = dataset

    def get_base_means(self,base_data_loader):
        if self.dataset =='miniImagenet':
            num_classes =64
        elif self.dataset == 'CUB':
            num_classes=100

            self.label_ids = np.unique(base_data_loader.dataset.meta['image_labels'])
            self.label_map = dict()
            for l1 in self.label_ids:
                self.label_map[l1.item()] = np.where(self.label_ids == l1)[0].item()
        base_means = torch.zeros(num_classes,self.feat_dim).cuda()
        feats_dict = dict()
        for i in range(num_classes):
            feats_dict[i] = torch.empty(0,self.feat_dim).cuda()

        with torch.no_grad():
            for (x,y) in base_data_loader:
                if self.dataset == 'CUB':
                    y1 = y.clone()
                    for l1 in torch.unique(y1): y[y1 == l1] = self.label_map[l1.item()]
                features = self.forward(x.cuda())
                for i in range(x.shape[0]):
                    label = y[i].item()
                    feats_dict[label] = torch.cat([feats_dict[label],features[i].unsqueeze(0)],dim=0)

            for m in range(num_classes):
                base_means[m]= torch.mean(feats_dict[m],dim=0)
        self.base_means_array = base_means.cpu().numpy()

    def distribution_calibration(self,query, m=2):

        query_array = np.tile(np.expand_dims(query,axis=0),(self.base_means_array.shape[0],1))
        dist = np.linalg.norm(query_array - self.base_means_array,axis=1)
        if m>0:
            index = np.argsort(dist)[:k]
            calibrated_mean = np.concatenate([self.base_means_array[index], query[np.newaxis, :]])
        else:
            calibrated_mean = np.concatenate([query[np.newaxis, :]])
        calibrated_mean = np.mean(calibrated_mean, axis=0)

        return torch.Tensor(calibrated_mean).cuda()


    def set_forward(self, x, is_feature=True):
        # Baseline always do adaptation
        n_way = x.size(0)
        x = x.contiguous().view(x.size(0)*x.size(1),x.size(2),x.size(3),x.size(4))
        features = self.forward(x)
        features = features.view(n_way,features.size(0)//n_way,features.size(1))
        return self.set_forward_adaptation(features, is_feature)

    def set_forward_adv(self, x_orig, is_feature=True):
        # Baseline always do adaptation
        n_way = x_orig.size(0)
        x = x_orig.contiguous().view(x_orig.size(0)*x_orig.size(1),x_orig.size(2),x_orig.size(3),x_orig.size(4))
        features = self.forward(x)
        features = features.view(n_way,features.size(0)//n_way,features.size(1))
        adv_scores = self.set_forward_adaptation_adv(features, x_orig,is_feature)
        return adv_scores

    def set_forward_adaptation(self, x, is_feature=True):
        # assert is_feature == True, 'Baseline only support testing with feature'
        z_support, z_query = self.parse_feature(x, is_feature = True)


        z_support = z_support.contiguous().view(self.n_way * self.n_support, -1)
        z_query = z_query.contiguous().view(self.n_way * self.n_query, -1)

        y_support = torch.from_numpy(
            np.repeat(range(self.n_way), self.n_support))
        y_support = Variable(y_support.cuda())




        beta = 0.5
        z_support = torch.pow((z_support) ,beta)
        z_query = torch.pow((z_query),beta)
        sampled_data = []
        sampled_label = []
        support_size = self.n_way * self.n_support
        num_sampled = int(750/self.n_support)
        z_support_mean_true = torch.zeros((self.n_way,z_support.shape[1])).cuda()
        for s in range(support_size):
            calib_mean = self.distribution_calibration(z_support[s].cpu().numpy(), m=2)
            z_support_mean_true[y_support[s]] += (calib_mean)/self.n_support

        z_aug = z_support
        y_aug = y_support



        z_support_mean = torch.zeros(self.n_way, z_support.shape[1]).cuda()
        for i in range(self.n_way):
            z_support_mean[i,:]= z_aug[(y_aug==i).nonzero(as_tuple=True)].mean(dim=0).cuda()

        z_support_mean = nn.functional.normalize(z_support_mean,dim=1)
        z_support_mean_true = nn.functional.normalize(z_support_mean_true,dim=1)
        z_query = nn.functional.normalize(z_query,dim=1)


        dist = torch.cdist(z_query,z_support_mean_true)
        scores = F.log_softmax((-1)*dist,dim=1)
        return scores

    def set_forward_adaptation_adv(self, feature, x_orig,is_feature=True):
        # assert is_feature == True, 'Baseline only support testing with feature'
        z_support, z_query = self.parse_feature(feature, is_feature = True)


        z_support = z_support.contiguous().view(self.n_way * self.n_support, -1)
        z_query = z_query.contiguous().view(self.n_way * self.n_query, -1)

        y_support = torch.from_numpy(
            np.repeat(range(self.n_way), self.n_support))
        y_support = Variable(y_support.cuda())


        beta = 0.5
        z_support = torch.pow((z_support) ,beta)
        z_query = torch.pow((z_query) ,beta)

        sampled_data = []
        sampled_label = []
        support_size = self.n_way * self.n_support
        z_support_mean_true = torch.zeros((self.n_way,z_support.shape[1])).cuda()
        for s in range(support_size):
            calib_mean = self.distribution_calibration(z_support[s].cpu().numpy(), m=2)
            z_support_mean_true[y_support[s]] += (calib_mean)/self.n_support

        z_aug = z_support
        y_aug = y_support

        z_support_mean = torch.zeros(self.n_way, z_support.shape[1]).cuda()
        for i in range(self.n_way):
            z_support_mean[i,:]= z_aug[(y_aug==i).nonzero(as_tuple=True)].mean(dim=0).cuda()

        z_support_mean = nn.functional.normalize(z_support_mean,dim=1)
        z_support_mean_true = nn.functional.normalize(z_support_mean_true,dim=1)
        z_query = nn.functional.normalize(z_query,dim=1)

        y = torch.from_numpy(np.repeat(range(self.n_way), self.n_query))
        y = y.cuda()

        self.eval()
        x_orig     = x_orig[:, self.n_support:]
        x_orig = x_orig.contiguous().view(x_orig.size(0)*x_orig.size(1),x_orig.size(2),x_orig.size(3),x_orig.size(4))
        x_nat = torch.zeros_like(x_orig).cuda()

        x_nat[:, 0, :, :] =  x_orig[:, 0, :, :]
        x_nat[:, 1, :, :] =  x_orig[:, 1, :, :]
        x_nat[:, 2, :, :] =  x_orig[:, 2, :, :]

        x = x_nat + torch.FloatTensor(x_nat.shape).uniform_(-self.epsilon, self.epsilon).cuda()

        x = x.cuda()

        for i in range(self.k):

            x.requires_grad=True

            z_query = self.forward(x)
            z_query = torch.pow((z_query) ,beta)
            z_query = nn.functional.normalize(z_query,dim=1)
            scores = F.log_softmax((-1)*torch.cdist(z_query,z_support_mean_true,p=2),dim=1)
            loss =  (-1)*torch.gather(scores,1,y.unsqueeze(1)).mean(dim=0)
            grad = torch.autograd.grad(loss,x)[0].detach()

            x = x.detach()



            x += self.a * torch.sign(grad)

            x = torch.max(torch.min(x, x_nat+self.epsilon), x_nat- self.epsilon)
            x = torch.clamp(x, 0, 1) # ensure valid pixel range

            torch.cuda.empty_cache()
        adv_scores = scores.clone()
        return adv_scores
    def set_forward_loss(self, x):
        raise ValueError(
            'Baseline predict on pretrained feature and do not support finetune backbone')




def feature_evaluation(cl_data_file, model, n_way=5, n_support=5, n_query=15, adaptation=False,adv_eval = False):


    class_list = cl_data_file.keys()
    select_class = random.sample(class_list, n_way)
    z_all = []
    for cl in select_class:
        imgs = cl_data_file[cl]
        perm_ids = np.random.permutation(len(imgs)).tolist()
        z_all.append( [imgs[perm_ids[i]].numpy()  for i in range(n_support + n_query)])  # stack each batch


    z_all =  torch.from_numpy(np.array(z_all)).cuda()
    model.n_query = n_query
    if adaptation:
        scores = model.set_forward_adaptation(z_all, is_feature=True)
    else:
        if not adv_eval:
            scores = model.set_forward(z_all, is_feature=True)
        else:
            scores = model.set_forward_adv(z_all, is_feature=True)

    pred = scores.data.cpu().numpy().argmax(axis=1)
    y = np.repeat(range(n_way), n_query)
    acc = np.mean(pred == y) * 100
    return acc
def init_loader_with_imgs(dataset):
    # with h5py.File(filename, 'r') as f:
    #     fileset = SimpleHDF5Dataset(f)

    #labels = [ l for l  in fileset.all_labels if l != 0]
    image_names = dataset.meta['image_names']
    labels = dataset.meta['image_labels']

    labels = [dataset.target_transform(dataset.meta['image_labels'][i]) for i in range(len(dataset.meta['image_labels']))]

    # while np.sum(feats[-1]) == 0:
    #     image_names  = np.delete(image_names,-1,axis = 0)
    #     labels = np.delete(labels,-1,axis = 0)

    class_list = np.unique(np.array(labels)).tolist()
    inds = range(len(labels))

    cl_data_file = {}
    for cl in class_list:
        cl_data_file[cl] = []
    for ind in inds:
        cl_data_file[labels[ind]].append(dataset.transform(Image.open(image_names[ind].convert('RGB')))

    return cl_data_file

if __name__ == '__main__':
    params = parse_args('test')

    acc_all = []

    iter_num = 1000

    few_shot_params = dict(n_way=params.test_n_way, n_support=params.n_shot)
    if params.dataset in ['omniglot', 'cross_char']:
        assert params.model == 'Conv4' and not params.train_aug, 'omniglot only support Conv4 without augmentation'
        params.model = 'Conv4S'

    if params.method == 'baseline':
        model = BaselineFinetune(model_dict[params.model],dataset = params.dataset, **few_shot_params)
    elif params.method == 'baseline++':
        model = BaselineFinetune(
            model_dict[params.model], loss_type='dist',dataset = params.dataset, **few_shot_params)
    elif params.method == 'protonet':
        model = ProtoNet(model_dict[params.model], **few_shot_params)
    elif params.method == 'matchingnet':
        model = MatchingNet(model_dict[params.model], **few_shot_params)
    elif params.method in ['relationnet', 'relationnet_softmax']:
        if params.model == 'Conv4':
            feature_model = backbone.Conv4NP
        elif params.model == 'Conv6':
            feature_model = backbone.Conv6NP
        elif params.model == 'Conv4S':
            feature_model = backbone.Conv4SNP
        else:
            def feature_model(): return model_dict[params.model](flatten=False)
        loss_type = 'mse' if params.method == 'relationnet' else 'softmax'
        model = RelationNet(
            feature_model, loss_type=loss_type, **few_shot_params)
    elif params.method in ['maml', 'maml_approx']:
        backbone.ConvBlock.maml = True
        backbone.SimpleBlock.maml = True
        backbone.BottleneckBlock.maml = True
        backbone.ResNet.maml = True
        model = MAML(model_dict[params.model], approx=(
            params.method == 'maml_approx'), **few_shot_params)
        # maml use different parameter in omniglot
        if params.dataset in ['omniglot', 'cross_char']:
            model.n_task = 32
            model.task_update_num = 1
            model.train_lr = 0.1
    else:
        raise ValueError('Unknown method')

    model = model.cuda()

    if params.checkpoint_dir is None:
        checkpoint_dir = '%s/checkpoints/%s/%s_%s_adv' % (
            configs.save_dir, params.dataset, params.model, params.method)
        if params.train_aug:
            checkpoint_dir += '_aug'
        if not params.method in ['baseline', 'baseline++']:
            checkpoint_dir += '_%dway_%dshot' % (params.train_n_way, params.n_shot)
    else:
        checkpoint_dir = params.checkpoint_dir
    # modelfile   = get_resume_file(checkpoint_dir)

    if not params.method in ['baseline', 'baseline++']:
        if params.save_iter != -1:
            modelfile = get_assigned_file(checkpoint_dir, params.save_iter)
        else:
            modelfile = get_best_file(checkpoint_dir)
        if modelfile is not None:
            tmp = torch.load(modelfile)
            model.load_state_dict(tmp['state'])
    else:


        if params.save_iter != -1:
            modelfile = get_assigned_file(checkpoint_dir, params.save_iter)
    #    elif params.method in ['baseline', 'baseline++'] :
    #        modelfile   = get_resume_file(checkpoint_dir) #comment in 2019/08/03 updates as the validation of baseline/baseline++ is added
        else:
            modelfile = get_best_file(checkpoint_dir)


    tmp = torch.load(modelfile)
    state = tmp['state']
    state_keys = list(state.keys())

    for i, key in enumerate(state_keys):
        # print(key)
        if "wa_feature." in key:
            # an architecture model has attribute 'feature', load architecture feature to backbone by casting name from 'feature.trunk.xx' to 'trunk.xx'
            #
            if 'module.' in key:
                newkey = key.replace('module.','')
            else:
                newkey = key.replace('wa_feature.','feature.')
            # newkey = key
            state[newkey] = state.pop(key)
        else:
            state.pop(key)
    model.load_state_dict(state)
    model.eval()


    split = params.split
    if params.save_iter != -1:
        split_str = split + "_" + str(params.save_iter)
    else:
        split_str = split
    # maml do not support testing with feature
    if params.method in ['maml', 'maml_approx']:
        if 'Conv' in params.model:
            if params.dataset in ['omniglot', 'cross_char']:
                image_size = 28
            else:
                image_size = 84
        else:
            image_size = 224

        datamgr = SetDataManager(
            image_size, n_eposide=iter_num, n_query=15, **few_shot_params)

        if params.dataset == 'cross':
            if split == 'base':
                loadfile = configs.data_dir['miniImagenet'] + 'all.json'
            else:
                loadfile = configs.data_dir['CUB'] + split + '.json'
        elif params.dataset == 'cross_char':
            if split == 'base':
                loadfile = configs.data_dir['omniglot'] + 'noLatin.json'
            else:
                loadfile = configs.data_dir['emnist'] + split + '.json'
        else:
            loadfile = configs.data_dir[params.dataset] + split + '.json'

        novel_loader = datamgr.get_data_loader(loadfile, aug=False)
        if params.adaptation:
            # We perform adaptation on MAML simply by updating more times.
            model.task_update_num = 100
        model.eval()
        acc_mean, acc_std = model.test_loop(novel_loader, return_std=True)

    else:
        if params.dataset == 'cross':
            if split == 'base':
                loadfile = configs.data_dir['miniImagenet'] + 'all.json'
            else:
                loadfile = configs.data_dir['CUB'] + split + '.json'
        elif params.dataset == 'cross_char':
            if split == 'base':
                loadfile = configs.data_dir['omniglot'] + 'noLatin.json'
            else:
                loadfile = configs.data_dir['emnist'] + split + '.json'
        else:
            loadfile = configs.data_dir[params.dataset] + split + '.json'

        if params.dataset == 'miniImagenet':
            datamgr = SimpleDataManager(image_size = 84, batch_size=64)
            data_loader = datamgr.get_data_loader(loadfile, aug=False)

            trainfile = configs.data_dir[params.dataset] +  'base.json'
            base_datamgr = SimpleDataManager(image_size = 84, batch_size=16)
            base_data_loader = base_datamgr.get_data_loader(trainfile, aug=False)
        else:
            datamgr = SimpleDataManagerWithoutNormalize(image_size = 84, batch_size=64)
            data_loader = datamgr.get_data_loader(loadfile, aug=False)

            trainfile = configs.data_dir[params.dataset] +  'base.json'
            base_datamgr = SimpleDataManagerWithoutNormalize(image_size = 84, batch_size=16)
            base_data_loader = base_datamgr.get_data_loader(trainfile, aug=False)
        model.get_base_means(base_data_loader)



        # defaut split = novel, but you can also test base or val classes
        # novel_file = os.path.join(checkpoint_dir.replace(
        #     "checkpoints", "features"), split_str + ".hdf5")
        # cl_data_file = data_loader.init_loader()
        cl_data_file = init_loader_with_imgs(data_loader.dataset)
        for i in range(iter_num):
            if i%100==0:print(i)
            acc = feature_evaluation(
                cl_data_file, model, n_query=15, adaptation=params.adaptation, adv_eval= params.adv_eval,**few_shot_params)
            # acc = feature_evaluation_with_aug(
            #     cl_data_file, model, n_query=15, adaptation=params.adaptation, **few_shot_params)
            acc_all.append(acc)


        acc_all = np.asarray(acc_all)
        acc_mean = np.mean(acc_all)
        acc_std = np.std(acc_all)
        print('%d Test Acc = %4.2f%% +- %4.2f%%' %
              (iter_num, acc_mean, 1.96 * acc_std / np.sqrt(iter_num)))
    # with open('./record/results.txt', 'a') as f:
    if params.dataset =='miniImagenet':
        results_file = './record/results.txt'
    elif params.dataset =='CUB':
        results_file = './record/cub_results.txt'
    with open(results_file, 'a') as f:
        timestamp = time.strftime("%Y%m%d-%H%M%S", time.localtime())
        aug_str = '-aug' if params.train_aug else ''
        aug_str += '-adapted' if params.adaptation else ''
        if params.method in ['baseline', 'baseline++']:
            if not params.adv_eval:
                exp_setting = '%s-%s--%s-%s-%s%s %sshot %sway_test_wa_nn' % (
                    params.dataset, checkpoint_dir ,split_str, params.model, params.method, aug_str, params.n_shot, params.test_n_way)
            else:
                exp_setting = '%s-%s--%s-%s-%s%s %sshot %sway_test_wa_nn_adv' % (
                    params.dataset, checkpoint_dir ,split_str, params.model, params.method, aug_str, params.n_shot, params.test_n_way)
        else:
            exp_setting = '%s-%s-%s-%s%s %sshot %sway_train %sway_test' % (
                params.dataset, split_str, params.model, params.method, aug_str, params.n_shot, params.train_n_way, params.test_n_way)
        acc_str = '%d Test Acc = %4.2f%% +- %4.2f%%' % (
            iter_num, acc_mean, 1.96 * acc_std / np.sqrt(iter_num))
        f.write('Time: %s, Setting: %s, Acc: %s \n' %
                (timestamp, exp_setting, acc_str))
