import argparse
import logging
import sys

from torch import optim

from disvae import Trainer, Evaluator
from disvae.models.anneal import get_anneal
from disvae.models.losses import LOSSES, RECON_DIST, get_loss_f
from disvae.models.vae import MODELS
from disvae.utils.modelIO import save_model, load_model, load_metadata
from utils.datasets import get_dataloaders, DATASETS
from utils.helpers import (get_device, set_seed, get_config_section, FormatterNoDuplicate)
from utils.visualize import GifTraversalsTraining

CONFIG_FILE = "hyperparam.ini"
LOG_LEVELS = list(logging._levelToName.values())


def parse_arguments(args_to_parse):
    """Parse the command line arguments.

    Parameters
    ----------
    args_to_parse: list of str
        Arguments to parse (splitted on whitespaces).
    """
    default_config = get_config_section([CONFIG_FILE], "Custom")

    description = "PyTorch implementation and evaluation of disentangled Variational AutoEncoders and metrics."
    parser = argparse.ArgumentParser(description=description,
                                     formatter_class=FormatterNoDuplicate)

    # General options
    general = parser.add_argument_group('General options')
    general.add_argument('--name', type=str,
                         help="Name of the model for storing and loading purposes.")
    general.add_argument('-L', '--log_level', help="Logging levels.",
                         default=default_config['log_level'], choices=LOG_LEVELS)
    general.add_argument('--no_progress_bar', action='store_true',
                         default=default_config['no_progress_bar'],
                         help='Disables progress bar.')
    general.add_argument('--device',
                         default=default_config['device'],
                         help='cuda:n,or cpu')
    general.add_argument('-s', '--seed', type=int, default=default_config['seed'],
                         help='Random seed. Can be `None` for stochastic behavior.')
    general.add_argument('--monitor', action='store_true',
                         help='monitor the training process')

    general.add_argument('--lr_decay', type=float, default=0.05,
                         help='Strength of pressure.')
    # Learning options
    training = parser.add_argument_group('Training specific options')
    training.add_argument('--checkpoint_every',
                          type=int, default=default_config['checkpoint_every'],
                          help='Save a checkpoint of the trained model every n epoch.')
    training.add_argument('-d', '--dataset', help="Path to training data.",
                          default=default_config['dataset'], choices=DATASETS)

    training.add_argument('-e', '--epochs', type=int,
                          default=default_config['epochs'],
                          help='Maximum number of epochs to run for.')
    training.add_argument('-b', '--batch_size', type=int,
                          default=default_config['batch_size'],
                          help='Batch size for training.')
    training.add_argument('--lr', type=float, default=default_config['lr'],
                          help='Learning rate.')

    # Model Options
    model = parser.add_argument_group('Model specfic options')
    model.add_argument('-m', '--model_type',
                       default=default_config['model'], choices=MODELS,
                       help='Type of encoder and decoder to use.')
    model.add_argument('-z', '--latent_dim', type=int,
                       default=default_config['latent_dim'],
                       help='Dimension of the latent variable.')
    model.add_argument('-l', '--loss',
                       default=default_config['loss'], choices=LOSSES,
                       help="Type of VAE loss function to use.")
    model.add_argument('-r', '--rec_dist', default=default_config['rec_dist'],
                       choices=RECON_DIST,
                       help="Form of the likelihood ot use for each pixel.")

    # Loss Specific Options
    betaH = parser.add_argument_group('BetaH specific parameters')

    betaH.add_argument('--betaH_B', type=float,
                       default=default_config['betaH_B'],
                       help="Weight of the KL (beta in the paper).")

    betaB = parser.add_argument_group('BetaB specific parameters')
    betaB.add_argument('--betaB_initC', type=float,
                       default=default_config['betaB_initC'],
                       help="Starting annealed capacity.")
    betaB.add_argument('--betaB_finC', type=float,
                       default=default_config['betaB_finC'],
                       help="Final annealed capacity.")
    betaB.add_argument('--betaB_G', type=float,
                       default=default_config['betaB_G'],
                       help="Weight of the KL divergence term (gamma in the paper).")

    factor = parser.add_argument_group('factor VAE specific parameters')
    factor.add_argument('--factor_G', type=float,
                        default=default_config['factor_G'],
                        help="Weight of the TC term (gamma in the paper).")
    factor.add_argument('--lr_disc', type=float,
                        default=default_config['lr_disc'],
                        help='Learning rate of the discriminator.')

    btcvae = parser.add_argument_group('beta-tcvae specific parameters')
    btcvae.add_argument('--btcvae_A', type=float,
                        default=default_config['btcvae_A'],
                        help="Weight of the MI term (alpha in the paper).")
    btcvae.add_argument('--btcvae_G', type=float,
                        default=default_config['btcvae_G'],
                        help="Weight of the dim-wise KL term (gamma in the paper).")
    btcvae.add_argument('--btcvae_B', type=float,
                        default=default_config['btcvae_B'],
                        help="Weight of the TC term (beta in the paper).")

    # Learning options
    evaluation = parser.add_argument_group('Evaluation specific options')
    evaluation.add_argument('--is_eval_only', type=bool,
                            default=default_config['is_eval_only'],
                            help='Whether to only evaluate using precomputed model `name`.')
    evaluation.add_argument('--is_metrics', type=bool,
                            default=default_config['is_metrics'],
                            help="Whether to compute the disentangled metrcics. Currently only possible with `dsprites` as it is the only dataset with known true factors of variations.")
    evaluation.add_argument('--no_test', type=bool,
                            default=default_config['no_test'],
                            help="Whether not to compute the test losses.`")
    evaluation.add_argument('--eval_batchsize', type=int,
                            default=default_config['eval_batchsize'],
                            help='Batch size for evaluation.')


    args = parser.parse_args(args_to_parse)

    dataset = args.dataset
    model = args.loss


    return args


def main(args):
    """Main train and evaluation function.

    Parameters
    ----------
    args: argparse.Namespace
        Arguments
    """
    import wandb
    formatter = logging.Formatter('%(asctime)s %(levelname)s - %(funcName)s: %(message)s',
                                  "%H:%M:%S")
    logger = logging.getLogger(__name__)
    logger.setLevel(args.log_level.upper())
    stream = logging.StreamHandler()
    stream.setLevel(args.log_level.upper())
    stream.setFormatter(formatter)
    logger.addHandler(stream)

    set_seed(args.seed)
    device = get_device(args.device)

    exp_dir=args.name
    wandb.init(config=args, project='refine',dir=exp_dir)
    logger.info("Root directory for saving and loading experiments: {}".format(exp_dir))


    model = load_model(exp_dir,device)
    metadata = load_metadata(exp_dir)

    # refine
    train_loader = get_dataloaders(args.dataset,
                                   num_workers=4,
                                   batch_size=args.batch_size,
                                   logger=logger)

    optimizer = optim.Adam(model.decoder.parameters(), lr=args.lr, weight_decay=0)

    # anneal

    anneal = get_anneal('constant',1,1,1)

    model = model.to(device)  # make sure trainer and viz on same device

    gif_visualizer = GifTraversalsTraining(model, args.dataset, exp_dir)
    loss_f = get_loss_f('btcvae', anneal,
                        n_data=len(train_loader.dataset),
                        **vars(args))

    trainer = Trainer(model, optimizer, loss_f,
                      device=device,
                      logger=logger,
                      save_dir=exp_dir,
                      is_progress_bar=not args.no_progress_bar,
                      gif_visualizer=gif_visualizer, )

    trainer(train_loader,
            epochs=args.epochs,
            checkpoint_every=args.checkpoint_every, )


    gif_visualizer.save_reset()
    # SAVE MODEL AND EXPERIMENT INFORMATION
    save_model(trainer.model, exp_dir, metadata=vars(args))

    # TO-DO: currently uses train datatset
    test_loader = get_dataloaders(metadata["dataset"],
                                  batch_size=args.eval_batchsize,
                                  num_workers=0,
                                  shuffle=False,
                                  logger=logger)

    loss_f = get_loss_f('btcvae', anneal,
                        n_data=len(test_loader.dataset),
                        **vars(args))

    evaluator = Evaluator(model, loss_f,
                          device=device,
                          logger=logger,
                          save_dir=exp_dir,
                          is_progress_bar=not args.no_progress_bar)

    evaluator(test_loader, is_metrics=args.is_metrics, is_losses=not args.no_test)



if __name__ == '__main__':
    args = parse_arguments(sys.argv[1:])
    main(args)
