import os
import csv
import torch
import torch.optim as optim
from absl import flags, app
from torchvision import datasets, transforms
from torchvision.utils import make_grid, save_image
from tensorboardX import SummaryWriter
from tqdm import trange
import time
import source.models.wgangp as models
import source.losses as losses
from source.utils import generate_imgs, infiniteloop, set_seed
from metrics.score.both import get_inception_score_and_fid
from source.AA import pytorchAA

net_G_models = {
    'res32': models.ResGenerator32,
    'res64': models.ResGenerator64,
    'cnn32': models.Generator32,
    'cnn64': models.Generator64,
}

net_D_models = {
    'res32': models.ResDiscriminator32,
    'res64': models.ResDiscriminator64,
    'cnn32': models.Discriminator32,
    'cnn64': models.Discriminator64,
}

loss_fns = {
    'bce': losses.BCEWithLogits,
    'hinge': losses.Hinge,
    'was': losses.Wasserstein,
    'softplus': losses.Softplus
}

FLAGS = flags.FLAGS
# model and training
flags.DEFINE_enum('dataset', 'celeba', ['cifar10', 'celeba'], "dataset")
flags.DEFINE_enum('arch', 'cnn64', net_G_models.keys(), "architecture")
flags.DEFINE_integer('total_steps', 100000, "total number of training steps")
flags.DEFINE_integer('batch_size', 64, "batch size")
flags.DEFINE_float('lr_G', 2e-4, "Generator learning rate")
flags.DEFINE_float('lr_D', 2e-4, "Discriminator learning rate")
flags.DEFINE_multi_float('betas', [0.0, 0.9], "for Adam")
flags.DEFINE_integer('n_dis', 5, "update Generator every this steps")
flags.DEFINE_integer('z_dim', 128, "latent space dimension")
flags.DEFINE_float('alpha', 10, "gradient penalty")
flags.DEFINE_enum('loss', 'was', loss_fns.keys(), "loss function")
flags.DEFINE_integer('seed', 0, "random seed")
# logging
flags.DEFINE_integer('eval_step', 1000, "evaluate FID and Inception Score")
flags.DEFINE_integer('sample_step', 500, "sample image every this steps")
flags.DEFINE_integer('sample_size', 64, "sampling size of images")
flags.DEFINE_string('logdir', './logs/WGANGP_CIFAR10_RES', 'logging folder')
flags.DEFINE_bool('record', True, "record inception score and FID score")
flags.DEFINE_string('fid_cache', './stats/celeba_stats.npz', 'FID cache')
# generate
flags.DEFINE_bool('generate', False, 'generate images')
flags.DEFINE_string('pretrain', None, 'path to test model')
flags.DEFINE_string('output', './outputs', 'path to output dir')
flags.DEFINE_integer('num_images', 50000, 'the number of generated images')

device = torch.device('cuda:0')

        
def combine(net_G, net_D):
    G_dict = dict(net_G.state_dict())
    D_dict = dict(net_D.state_dict())
    vl = []
    for key in G_dict:
        v = G_dict[key].view(-1)
        vl.append(v)
    for key in D_dict:
        v = D_dict[key].view(-1)
        vl.append(v)  
    fp = torch.cat(vl)    
    return fp

def generate():
    assert FLAGS.pretrain is not None, "set model weight by --pretrain [model]"

    net_G = net_G_models[FLAGS.arch](FLAGS.z_dim).to(device)
    net_G.load_state_dict(torch.load(FLAGS.pretrain)['net_G'])
    net_G.eval()

    counter = 0
    os.makedirs(FLAGS.output)
    with torch.no_grad():
        for start in trange(
                0, FLAGS.num_images, FLAGS.batch_size, dynamic_ncols=True):
            batch_size = min(FLAGS.batch_size, FLAGS.num_images - start)
            z = torch.randn(batch_size, FLAGS.z_dim).to(device)
            x = net_G(z).cpu()
            x = (x + 1) / 2
            for image in x:
                save_image(
                    image, os.path.join(FLAGS.output, '%d.png' % counter))
                counter += 1


def cacl_gradient_penalty(net_D, real, fake):
    t = torch.rand(real.size(0), 1, 1, 1).to(real.device)
    t = t.expand(real.size())

    interpolates = t * real + (1 - t) * fake
    interpolates.requires_grad_(True)
    disc_interpolates = net_D(interpolates)
    grad = torch.autograd.grad(
        outputs=disc_interpolates, inputs=interpolates,
        grad_outputs=torch.ones_like(disc_interpolates),
        create_graph=True, retain_graph=True)[0]

    grad_norm = torch.norm(torch.flatten(grad, start_dim=1), dim=1)
    loss_gp = torch.mean((grad_norm - 1) ** 2)
    return loss_gp


def train():
    def divide(fp, length_g):
        vG, vD = fp[0:length_g], fp[length_g:]    
        offset = 0
        for k, v in sizeG.items():
            G_dict[k].data.copy_(vG[offset: offset + v.numel()].view(v))
            offset = offset + v.numel()
        net_G.load_state_dict(G_dict) 
        offset = 0
        for k, v in sizeD.items():
            D_dict[k].data.copy_(vD[offset: offset + v.numel()].view(v))
            offset = offset + v.numel()
        net_D.load_state_dict(D_dict)
    if FLAGS.dataset == 'cifar10':
        dataset = datasets.CIFAR10(
            './data', train=True, download=True,
            transform=transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            ]))
    if FLAGS.dataset == 'celeba':
        root = './data'
        h_start = 50
        h_end = 128 + 50
        w_start = 25
        w_end = 128+25
        crop_trans = lambda x: x[:, h_start:h_end, w_start:w_end]

        # x: [0, 1] -> [-1, 1]
        linear_trans = lambda x: 2*x -1

        # to -1 and 1
        dataset = datasets.ImageFolder(root=root,
                                transform=transforms.Compose([
                                    transforms.ToTensor(),
                                    transforms.Lambda(crop_trans),
                                    transforms.ToPILImage(),
                                    transforms.Resize((64,64)),
                                    transforms.ToTensor(),
                                    transforms.Lambda(linear_trans),
                                    # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                                ]))

    dataloader = torch.utils.data.DataLoader(
        dataset, batch_size=FLAGS.batch_size, shuffle=True, num_workers=4,
        drop_last=True)

    net_G = net_G_models[FLAGS.arch](FLAGS.z_dim).to(device)
    net_D = net_D_models[FLAGS.arch]().to(device)
    loss_fn = loss_fns[FLAGS.loss]()

    optim_G = optim.Adam(net_G.parameters(), lr=FLAGS.lr_G, betas=FLAGS.betas)
    optim_D = optim.Adam(net_D.parameters(), lr=FLAGS.lr_D, betas=FLAGS.betas)
    sched_G = optim.lr_scheduler.LambdaLR(
        optim_G, lambda step: 1 - step / FLAGS.total_steps)
    sched_D = optim.lr_scheduler.LambdaLR(
        optim_D, lambda step: 1 - step / FLAGS.total_steps)
    G_dict = dict(net_G.state_dict())
    D_dict = dict(net_D.state_dict())
    sizeG = {}
    for key in G_dict:
        sizeG[key] = G_dict[key].shape
    sizeD = {}
    for key in D_dict:
        sizeD[key] = D_dict[key].shape
    fpprev = combine(net_G, net_D)
    sum_G = sum(v.numel() for _, v in G_dict.items())
    sum_D = sum(v.numel() for _, v in D_dict.items())
    sumGD = sum_G + sum_D
    assert(len(fpprev) ==sumGD)
    aa_wrk = pytorchAA(sumGD, 150, type2=True, reg=1) 
    
    os.makedirs(os.path.join(FLAGS.logdir, 'sample'))
    writer = SummaryWriter(os.path.join(FLAGS.logdir))
    sample_z = torch.randn(FLAGS.sample_size, FLAGS.z_dim).to(device)
    with open(os.path.join(FLAGS.logdir, "flagfile.txt"), 'w') as f:
        f.write(FLAGS.flags_into_string())
    writer.add_text(
        "flagfile", FLAGS.flags_into_string().replace('\n', '  \n'))

    real, _ = next(iter(dataloader))
    grid = (make_grid(real[:FLAGS.sample_size]) + 1) / 2
    writer.add_image('real_sample', grid)
    inception_f = open('wgangp_celeba_results.csv', 'w')
    inception_writter = csv.writer(inception_f)
    looper = infiniteloop(dataloader)
    start = time.time()
    with trange(1, FLAGS.total_steps + 1, desc='Training', ncols=0) as pbar:
        
        for step in pbar:
            # Discriminator
            for _ in range(FLAGS.n_dis):
                with torch.no_grad():
                    z = torch.randn(FLAGS.batch_size, FLAGS.z_dim).to(device)
                    fake = net_G(z).detach()
                real = next(looper).to(device)
                net_D_real = net_D(real)
                net_D_fake = net_D(fake)
                loss = loss_fn(net_D_real, net_D_fake)
                loss_gp = cacl_gradient_penalty(net_D, real, fake)
                loss_all = loss + FLAGS.alpha * loss_gp

                optim_D.zero_grad()
                loss_all.backward()
                optim_D.step()

                if FLAGS.loss == 'was':
                    loss = -loss
                pbar.set_postfix(loss='%.4f' % loss)
            writer.add_scalar('loss', loss, step)
            writer.add_scalar('loss_gp', loss_gp, step)

            # Generator
            z = torch.randn(FLAGS.batch_size * 2, FLAGS.z_dim).to(device)
            loss = loss_fn(net_D(net_G(z)))

            optim_G.zero_grad()
            loss.backward()
            optim_G.step()
            fp = combine(net_G, net_D)
            fp = aa_wrk.apply(fpprev, fp)
            fpprev = fp.detach().clone()
            divide(fp, sum_G)
            sched_G.step()
            sched_D.step()
            pbar.update(1)

            if step == 1 or step % FLAGS.sample_step == 0:
                fake = net_G(sample_z).cpu()
                grid = (make_grid(fake) + 1) / 2
                writer.add_image('sample', grid, step)
                save_image(grid, os.path.join(
                    FLAGS.logdir, 'sample', '%d.png' % step))

            if step == 1 or step % FLAGS.eval_step == 0:
                torch.save({
                    'net_G': net_G.state_dict(),
                    'net_D': net_D.state_dict(),
                    'optim_G': optim_G.state_dict(),
                    'optim_D': optim_D.state_dict(),
                    'sched_G': sched_G.state_dict(),
                    'sched_D': sched_D.state_dict(),
                }, os.path.join(FLAGS.logdir, 'model.pt'))
                if FLAGS.record:
                    imgs = generate_imgs(
                        net_G, device, FLAGS.z_dim, 50000, FLAGS.batch_size)
                    IS, FID = get_inception_score_and_fid(
                        imgs, FLAGS.fid_cache, verbose=True)
                    pbar.write(
                        "%s/%s Inception Score: %.3f(%.5f), "
                        "FID Score: %6.3f" % (
                            step, FLAGS.total_steps, IS[0], IS[1], FID))
                    writer.add_scalar('Inception_Score', IS[0], step)
                    writer.add_scalar('Inception_Score_std', IS[1], step)
                    writer.add_scalar('FID', FID, step)
                    passed = time.time() -start
                    inception_writter.writerow((step, IS[0], FID, passed))
                    inception_f.flush()
    writer.close()


def main(argv):
    set_seed(FLAGS.seed)
    if FLAGS.generate:
        generate()
    else:
        train()


if __name__ == '__main__':
    app.run(main)
