import torch
import numpy as np
from torch.autograd import Variable
import torch.nn as nn
import torch.optim
import json
import torch.utils.data.sampler
import os
import glob
import random
import time
import numpy as np
import configs
import backbone
import data.feature_loader as feat_loader
from data.datamgr import SetDataManager, SimpleDataManager
from methods.baselinetrain import BaselineTrain
# from methods.baselinefinetune import BaselineFinetune
from methods.protonet import ProtoNet
from methods.matchingnet import MatchingNet
from methods.relationnet import RelationNet
from methods.maml import MAML
from io_utils import model_dict, parse_args, get_resume_file, get_best_file, get_assigned_file
import pdb
from methods.meta_template import MetaTemplate
from PIL import Image
from torchvision import utils
from cifar_fs import *
from torch.utils.data import DataLoader

class BaselineFinetune(MetaTemplate):
    def __init__(self, model_func, n_way, n_support, loss_type="softmax"):
        super(BaselineFinetune, self).__init__(model_func, n_way, n_support)
        self.loss_type = loss_type

        self.k = 20
        self.a = 2.0/255
        self.epsilon = 8.0/255
        if model_func.__name__ == 'R2D2':
            self.feat_dim = 8192
        elif model_func.__name__ == 'ResNet12':
            self.feat_dim = 2560
        elif model_func.__name__ == 'Conv4':
            self.feat_dim = 256
        elif model_func.__name__ == 'C4':
            self.feat_dim = 512
        elif model_func.__name__ == 'ResNet18' or model_func.__name__ == 'ResNet18_pool' :
            self.feat_dim = 512
        else: raise NotImplementedError
    def set_forward(self, x, is_feature=True):
        # Baseline always do adaptation
        n_way = x.size(0)

        x = x.contiguous().view(x.size(0)*x.size(1),x.size(2),x.size(3),x.size(4))
        features = self.forward(x)
        features = features.view(n_way,features.size(0)//n_way,features.size(1))
        return self.set_forward_adaptation(features, is_feature)

    def set_forward_adv(self, x_orig, is_feature=True):
        # Baseline always do adaptation
        n_way = x_orig.size(0)
        x = x_orig.contiguous().view(x_orig.size(0)*x_orig.size(1),x_orig.size(2),x_orig.size(3),x_orig.size(4))
        features = self.forward(x)
        features = features.view(n_way,features.size(0)//n_way,features.size(1))
        adv_scores = self.set_forward_adaptation_adv(features, x_orig,is_feature)
        return adv_scores

    def set_forward_adaptation(self, x, is_feature=True):
        # assert is_feature == True, 'Baseline only support testing with feature'
        z_support, z_query = self.parse_feature(x, is_feature = True)


        z_support = z_support.contiguous().view(self.n_way * self.n_support, -1)
        z_query = z_query.contiguous().view(self.n_way * self.n_query, -1)
        y_support = torch.from_numpy(
            np.repeat(range(self.n_way), self.n_support))
        y_support = Variable(y_support.cuda())

        if self.loss_type == 'softmax':
            linear_clf = nn.Linear(self.feat_dim, self.n_way)
        elif self.loss_type == 'dist':
            linear_clf = backbone.distLinear(self.feat_dim, self.n_way)
        self.linear_clf = linear_clf.cuda()

        set_optimizer = torch.optim.SGD(self.linear_clf.parameters(), lr=0.01, momentum=0.9, dampening=0.9, weight_decay=0.001)

        # set_optimizer = torch.optim.SGD(self.linear_clf.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001,nesterov=True)


        # params = list(self.feature.parameters()) + list(self.linear_clf.parameters())
        # set_optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9, dampening=0.9, weight_decay=0.001)

        loss_function = nn.CrossEntropyLoss()
        loss_function = loss_function.cuda()
        batch_size = 4
        support_size = self.n_way * self.n_support
        for epoch in range(100):
            rand_id = np.random.permutation(support_size)
            for i in range(0, support_size, batch_size):
                set_optimizer.zero_grad()
                selected_id = torch.from_numpy(
                    rand_id[i: min(i + batch_size, support_size)]).cuda()

                z_batch = z_support[selected_id]
                y_batch = y_support[selected_id]
                scores = self.linear_clf(z_batch)
                loss = loss_function(scores, y_batch)
                loss.backward()
                set_optimizer.step()
        scores = self.linear_clf(z_query)

        return scores

    def set_forward_adaptation_adv(self, feature, x_orig,is_feature=True):
        # assert is_feature == True, 'Baseline only support testing with feature'
        z_support, z_query = self.parse_feature(feature, is_feature = True)


        z_support = z_support.contiguous().view(self.n_way * self.n_support, -1)
        z_query = z_query.contiguous().view(self.n_way * self.n_query, -1)
        y_support = torch.from_numpy(
            np.repeat(range(self.n_way), self.n_support))
        y_support = Variable(y_support.cuda())

        if self.loss_type == 'softmax':
            linear_clf = nn.Linear(self.feat_dim, self.n_way)
        elif self.loss_type == 'dist':
            linear_clf = backbone.distLinear(self.feat_dim, self.n_way)
        self.linear_clf = linear_clf.cuda()

        set_optimizer = torch.optim.SGD(self.linear_clf.parameters(
        ), lr=0.01, momentum=0.9, dampening=0.9, weight_decay=1e-3)


        loss_function = nn.CrossEntropyLoss()
        loss_function = loss_function.cuda()

        batch_size = 4
        support_size = self.n_way * self.n_support
        for epoch in range(100):
            rand_id = np.random.permutation(support_size)
            for i in range(0, support_size, batch_size):
                set_optimizer.zero_grad()
                selected_id = torch.from_numpy(
                    rand_id[i: min(i + batch_size, support_size)]).cuda()
                z_batch = z_support[selected_id]
                y_batch = y_support[selected_id]
                scores = self.linear_clf(z_batch)
                loss = loss_function(scores, y_batch)
                loss.backward()
                set_optimizer.step()

        y = torch.from_numpy(np.repeat(range(self.n_way), self.n_query))
        y = y.cuda()

        self.eval()
        self.linear_clf.eval()
        x_orig     = x_orig[:, self.n_support:]
        x_orig = x_orig.contiguous().view(x_orig.size(0)*x_orig.size(1),x_orig.size(2),x_orig.size(3),x_orig.size(4))
        x_nat = torch.zeros_like(x_orig).cuda()

        x_nat[:, 0, :, :] =  x_orig[:, 0, :, :]
        x_nat[:, 1, :, :] =  x_orig[:, 1, :, :]
        x_nat[:, 2, :, :] =  x_orig[:, 2, :, :]

        x = x_nat + torch.FloatTensor(x_nat.shape).uniform_(-self.epsilon, self.epsilon).cuda()
        x[:, 0, :, :] = x[:, 0, :, :]
        x[:, 1, :, :] = x[:, 1, :, :]
        x[:, 2, :, :] = x[:, 2, :, :]
        x = x.cuda()

        for i in range(self.k):
            x.requires_grad=True
            z_query = self.forward(x)
            scores  = self.linear_clf(z_query)
            loss =  loss_function(scores, y)
            grad = torch.autograd.grad(loss,x)[0].detach()
            x = x.detach()
            x[:, 0, :, :] =  x[:, 0, :, :]
            x[:, 1, :, :] =  x[:, 1, :, :]
            x[:, 2, :, :] =  x[:, 2, :, :]
            x += self.a * torch.sign(grad)
            x = torch.max(torch.min(x, x_nat+self.epsilon), x_nat- self.epsilon)
            x = torch.clamp(x, 0, 1) # ensure valid pixel range
            # utils.save_image(x.clone()[0],str(i)+'.png')
            x[:, 0, :, :] = x[:, 0, :, :]
            x[:, 1, :, :] = x[:, 1, :, :]
            x[:, 2, :, :] = x[:, 2, :, :]
            torch.cuda.empty_cache()
        adv_scores = scores.clone()
        return adv_scores
    def set_forward_loss(self, x):
        raise ValueError(
            'Baseline predict on pretrained feature and do not support finetune backbone')




def feature_evaluation(cl_data_file, model, n_way=5, n_support=5, n_query=15, adaptation=False,adv_eval = False):


    class_list = cl_data_file.keys()
    select_class = random.sample(class_list, n_way)
    z_all = []
    for cl in select_class:
        imgs = cl_data_file[cl]
        perm_ids = np.random.permutation(len(imgs)).tolist()
        z_all.append( [imgs[perm_ids[i]].numpy()  for i in range(n_support + n_query)])  # stack each batch


    z_all =  torch.from_numpy(np.array(z_all)).cuda()
    model.n_query = n_query
    if adaptation:
        scores = model.set_forward_adaptation(z_all, is_feature=True)
    else:
        if not adv_eval:
            scores = model.set_forward(z_all, is_feature=True)
        else:
            scores = model.set_forward_adv(z_all, is_feature=True)

    pred = scores.data.cpu().numpy().argmax(axis=1)
    y = np.repeat(range(n_way), n_query)
    acc = np.mean(pred == y) * 100
    return acc
def init_loader_with_imgs(dataset):
    # with h5py.File(filename, 'r') as f:
    #     fileset = SimpleHDF5Dataset(f)

    #labels = [ l for l  in fileset.all_labels if l != 0]
    # image_names = dataset.meta['image_names']
    # labels = dataset.meta['image_labels']
    images = dataset.data
    labels = dataset.labels

    # while np.sum(feats[-1]) == 0:
    #     image_names  = np.delete(image_names,-1,axis = 0)
    #     labels = np.delete(labels,-1,axis = 0)

    class_list = np.unique(np.array(labels)).tolist()
    # class_list = dataset.labelIds
    inds = range(len(labels))

    cl_data_file = {}
    for cl in class_list:
        cl_data_file[cl] = []
    for ind in inds:
        cl_data_file[labels[ind]].append(dataset.transform(Image.fromarray(images[ind])))

    return cl_data_file

if __name__ == '__main__':
    params = parse_args('test')

    acc_all = []

    # iter_num = 600
    iter_num = 1000

    few_shot_params = dict(n_way=params.test_n_way, n_support=params.n_shot)
    if params.dataset in ['omniglot', 'cross_char']:
        assert params.model == 'Conv4' and not params.train_aug, 'omniglot only support Conv4 without augmentation'
        params.model = 'Conv4S'

    if params.method == 'baseline':
        model = BaselineFinetune(model_dict[params.model], **few_shot_params)
    elif params.method == 'baseline++':
        model = BaselineFinetune(
            model_dict[params.model], loss_type='dist', **few_shot_params)
    elif params.method == 'protonet':
        model = ProtoNet(model_dict[params.model], **few_shot_params)
    elif params.method == 'matchingnet':
        model = MatchingNet(model_dict[params.model], **few_shot_params)
    elif params.method in ['relationnet', 'relationnet_softmax']:
        if params.model == 'Conv4':
            feature_model = backbone.Conv4NP
        elif params.model == 'Conv6':
            feature_model = backbone.Conv6NP
        elif params.model == 'Conv4S':
            feature_model = backbone.Conv4SNP
        else:
            def feature_model(): return model_dict[params.model](flatten=False)
        loss_type = 'mse' if params.method == 'relationnet' else 'softmax'
        model = RelationNet(
            feature_model, loss_type=loss_type, **few_shot_params)
    elif params.method in ['maml', 'maml_approx']:
        backbone.ConvBlock.maml = True
        backbone.SimpleBlock.maml = True
        backbone.BottleneckBlock.maml = True
        backbone.ResNet.maml = True
        model = MAML(model_dict[params.model], approx=(
            params.method == 'maml_approx'), **few_shot_params)
        # maml use different parameter in omniglot
        if params.dataset in ['omniglot', 'cross_char']:
            model.n_task = 32
            model.task_update_num = 1
            model.train_lr = 0.1
    else:
        raise ValueError('Unknown method')

    model = model.cuda()
    if params.checkpoint_dir is None:
        checkpoint_dir = '%s/checkpoints/%s/%s_%s_adv' % (
            configs.save_dir, params.dataset, params.model, params.method)
        if params.train_aug:
            checkpoint_dir += '_aug'
        if not params.method in ['baseline', 'baseline++']:
            checkpoint_dir += '_%dway_%dshot' % (params.train_n_way, params.n_shot)
    else:
        checkpoint_dir = params.checkpoint_dir
    # modelfile   = get_resume_file(checkpoint_dir)

    if not params.method in ['baseline', 'baseline++']:
        if params.save_iter != -1:
            modelfile = get_assigned_file(checkpoint_dir, params.save_iter)
        else:
            modelfile = get_best_file(checkpoint_dir)
        if modelfile is not None:
            tmp = torch.load(modelfile)
            model.load_state_dict(tmp['state'])
    else:


        if params.save_iter != -1:
            modelfile = get_assigned_file(checkpoint_dir, params.save_iter)
    #    elif params.method in ['baseline', 'baseline++'] :
    #        modelfile   = get_resume_file(checkpoint_dir) #comment in 2019/08/03 updates as the validation of baseline/baseline++ is added
        else:
            modelfile = get_best_file(checkpoint_dir)


    tmp = torch.load(modelfile)
    state = tmp['state']
    state_keys = list(state.keys())
    for i, key in enumerate(state_keys):
        # print(key)
        # if "feature." in key or 'wa_feature.' in key:
        if  'wa_feature.' in key:
        # if "wa_feature." not in key and "feature." in key :
            # an architecture model has attribute 'feature', load architecture feature to backbone by casting name from 'feature.trunk.xx' to 'trunk.xx'
            #
            if 'module.' in key:
                newkey = key.replace('module.','')

            else:
                newkey = key.replace('wa_feature.','feature.')

            # if 'feature_q.' in key:
            #     newkey = key.replace('feature_q.','feature.')
            state[newkey] = state.pop(key)
        else:
            state.pop(key)
    state_keys = list(state.keys())
    model.load_state_dict(state)
    model.eval()

    split = params.split
    if params.save_iter != -1:
        split_str = split + "_" + str(params.save_iter)
    else:
        split_str = split
    # maml do not support testing with feature



    data_loader = DataLoader(CIFAR_FS(phase='test'),batch_size=64,shuffle=True,pin_memory=True,num_workers=4)
    # datamgr = SimpleDataManager(image_size = 84, batch_size=64)
    # data_loader = datamgr.get_data_loader(loadfile, aug=False)
    # defaut split = novel, but you can also test base or val classes
    # novel_file = os.path.join(checkpoint_dir.replace(
    #     "checkpoints", "features"), split_str + ".hdf5")
    # cl_data_file = data_loader.init_loader()
    cl_data_file = init_loader_with_imgs(data_loader.dataset)
    for i in range(iter_num):
        acc = feature_evaluation(
            cl_data_file, model, n_query=15, adaptation=params.adaptation, adv_eval= params.adv_eval,**few_shot_params)
        # acc = feature_evaluation_with_aug(
        #     cl_data_file, model, n_query=15, adaptation=params.adaptation, **few_shot_params)
        acc_all.append(acc)
        if i%100==0:print(i)

    acc_all = np.asarray(acc_all)
    acc_mean = np.mean(acc_all)
    acc_std = np.std(acc_all)
    print('%d Test Acc = %4.2f%% +- %4.2f%%' %
          (iter_num, acc_mean, 1.96 * acc_std / np.sqrt(iter_num)))
    with open('./record/cifar_results.txt', 'a') as f:
        timestamp = time.strftime("%Y%m%d-%H%M%S", time.localtime())
        aug_str = '-aug' if params.train_aug else ''
        aug_str += '-adapted' if params.adaptation else ''
        if params.method in ['baseline', 'baseline++']:
            if not params.adv_eval:
                exp_setting = '%s-%s--%s-%s-%s%s %sshot %sway_test' % (
                    params.dataset, checkpoint_dir ,split_str, params.model, params.method, aug_str, params.n_shot, params.test_n_way)
            else:
                exp_setting = '%s-%s--%s-%s-%s%s %sshot %sway_test_adv' % (
                    params.dataset, checkpoint_dir ,split_str, params.model, params.method, aug_str, params.n_shot, params.test_n_way)
        else:
            exp_setting = '%s-%s-%s-%s%s %sshot %sway_train %sway_test' % (
                params.dataset, split_str, params.model, params.method, aug_str, params.n_shot, params.train_n_way, params.test_n_way)
        acc_str = '%d Test Acc = %4.2f%% +- %4.2f%%' % (
            iter_num, acc_mean, 1.96 * acc_std / np.sqrt(iter_num))
        f.write('Time: %s, Setting: %s, Acc: %s \n' %
                (timestamp, exp_setting, acc_str))
