import torch
import tqdm
import numpy as np
from torch.autograd import Variable
import torchvision.models as models
import torch.nn as nn
import random
from lib.util.mytoolbag import cal_para, get_gradient_tensor, multi_tensor_gra
from lib.dataset.mydata import CifarData, Cifar100
from torch.utils.data import DataLoader
from lib.model.densenet import DenseNet121 as Net
import torch.optim as optim
from lib.util.mytoolbag import setup_seed
from lib.model.effv2 import Effnet
import time
from lib.model.densenet import densenet_cifar
import argparse
from lib.model.effnet import Eff_s
from lib.dataset.imagenet import ImageNet
from lib.util.logger import Logger
from transformers import ViTFeatureExtractor, ViTModel
from lib.model.vit import Vit

criterion = nn.CrossEntropyLoss()
SIZE = 30


def train_net(train_loader, net, optimizer, testloader, rd=50, scheduler=None, logger=None):
    accl = 0
    acctrain = 0
    epoch = 0
    # with tqdm.tqdm(total=tmp_tot) as p_bar:
    for i in range(rd):
        bg = time.time()
        epoch += 1
        train_acc, train_loss, test_loss = 0, 0, 0
        net.train()
        p_bar = tqdm.tqdm(total=len(train_loader))
        for data in train_loader:
            inputs, labels = data
            inputs, labels = inputs.cuda(), labels.cuda()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            predicted = torch.max(outputs, 1)[1].data.cpu().numpy()
            train_acc += (predicted == labels.data.cpu().numpy()).sum()
            train_loss += float(loss)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            p_bar.update(1)
        acc = 0
        net.eval()
        for data in testloader:
            images, labels = data
            images = images.cuda()
            labels = labels.cuda()
            outputs = net(images)
            test_loss += float(criterion(outputs, labels))
            predicted = torch.max(outputs, 1)[1].data.cpu().numpy()
            acc += (predicted == labels.data.cpu().numpy()).sum()
        accl = max(accl, acc)
        print('epoch : %d  ' % epoch, end='')
        print('acc : %.1f ' % acc, end='')
        print(time.time() - bg)
        if logger:
            logger.epoch_log2(epoch, train_acc / len(train_loader.dataset) * 100, train_loss / len(train_loader),
                              acc / len(testloader.dataset) * 100, test_loss / len(testloader))
        acctrain = max(acctrain, train_acc)
        if scheduler:
            scheduler.step()
    print(accl)
    return acctrain, accl


def round1(i, now_set, data, rd=20, args=None, logger=None):
    setup_seed(i)
    net = Effnet(num_cls=10).cuda()
    test_data = data.train_loader(data_set=data.test_set, batch=args.b)
    optimizer = optim.AdamW(net.parameters(), lr=0.001, weight_decay=0.005)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001, epochs=20, steps_per_epoch=50000 // args.b)

    return train_net(now_set, net, optimizer, test_data, rd=rd, scheduler=scheduler, logger=logger)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-b', default=32, type=int)
    parser.add_argument('-r', default=1, type=int)
    args = parser.parse_args()

    data = CifarData(size=384)
    logger1 = Logger(name='eff-cifar10-base')
    logger2 = Logger(name='base_result', tim=False)

    acc, tacc = [], []
    for i in range(args.r):
        md = data.train_loader(batch=args.b)
        acct, acce = round1(i, md, data=data, args=args, rd=40, logger=logger1)
        acce /= 100
        acct /= len(md.dataset) / 100
        acc.append(acce)
        tacc.append(acct)
        print('test acc: ', sum(acc) / len(acc), np.std(acc), ' | train acc: ', np.mean(tacc), np.std(tacc))
    logger2.info('eff-cifar10' +
                 ' |test acc: ' + str(round(np.mean(acc), 2)) + '+' + str(round(np.std(acc), 3)) +
                 ' |train acc: ' + str(round(np.mean(tacc), 2)) + '+' + str(round(np.std(tacc), 3)) + '\n')
    logger2.info('----------------------------------------------------------------------------------')


if __name__ == '__main__':
    main()

"""
srun -p NLP --gres=gpu:1 --quotatype=spot -N1 python -u 9efftest.py
 
 > vit100.log 2>&1 &


CUDA_VISIBLE_DEVICES=0 nohup python -u 1train.py -m tr -p o1.txt > o1o.out 2>&1 &
CUDA_VISIBLE_DEVICES=0 nohup python 1train.py -m rnd -p o2.txt > o2.out 2>&1 &
CUDA_VISIBLE_DEVICES=3 nohup python -u 1train.py -p o1.txt > o500.out 2>&1 &

"""