import datetime
import math

import torch
from logging import getLogger

from torch.cuda.amp import GradScaler

from CVRPEnv import CVRPEnv as Env
from CVRPModel import CVRPModel as Model
from torch import autocast

from torch.optim import Adam as Optimizer
from torch.optim.lr_scheduler import MultiStepLR as Scheduler
import torch.nn as nn

from utils.utils import *


class CVRPTrainer:
    def __init__(self,
                 env_params,
                 model_params,
                 optimizer_params,
                 trainer_params):

        # save arguments
        self.env_params = env_params
        self.model_params = model_params
        self.optimizer_params = optimizer_params
        self.trainer_params = trainer_params

        # result folder, logger
        self.logger = getLogger(name='trainer')
        self.result_folder = get_result_folder()
        self.result_log = LogData()

        # cuda
        USE_CUDA = self.trainer_params['use_cuda']
        if USE_CUDA:
            cuda_device_num = self.trainer_params['cuda_device_num']
            torch.cuda.set_device(cuda_device_num)
            device = torch.device('cuda', cuda_device_num)
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            device = torch.device('cpu')
            torch.set_default_tensor_type('torch.FloatTensor')

        # Main Components
        self.model = Model(**self.model_params)
        self.env = Env(**self.env_params)
        self.optimizer = Optimizer(self.model.parameters(), **self.optimizer_params['optimizer'])
        self.scheduler = Scheduler(self.optimizer, **self.optimizer_params['scheduler'])
        if torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
        # self.model = nn.DataParallel(self.model)
        # Restore
        self.start_epoch = 1
        model_load = trainer_params['model_load']
        if model_load['enable']:
            checkpoint_fullname = '{path}/checkpoint-{epoch}.pt'.format(**model_load)
            checkpoint = torch.load(checkpoint_fullname, map_location=device)
            self.model.load_state_dict(checkpoint['model_state_dict'])
            self.start_epoch = 1 + model_load['epoch']
            self.result_log.set_raw_data(checkpoint['result_log'])
            self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            self.scheduler.last_epoch = model_load['epoch']-1
            self.logger.info('Saved Model Loaded !!')

        # utility
        self.time_estimator = TimeEstimator()

    def run(self):
        self.time_estimator.reset(self.start_epoch)
        # self.lr_decay_epoch = self.optimizer_params['lr_decay_epoch']
        scaler = GradScaler()
        for epoch in range(self.start_epoch, self.trainer_params['epochs']+1):
            self.logger.info('=================================================================')

            # LR Decay
            self.scheduler.step()

            # Train
            train_score, train_loss = self._train_one_epoch(epoch, scaler)

            self.result_log.append('train_score', epoch, train_score)
            self.result_log.append('train_loss', epoch, train_loss)



            ############################
            # Logs & Checkpoint
            ############################
            elapsed_time_str, remain_time_str = self.time_estimator.get_est_string(epoch, self.trainer_params['epochs'])
            self.logger.info("Epoch {:3d}/{:3d}: Time Est.: Elapsed[{}], Remain[{}]".format(
                epoch, self.trainer_params['epochs'], elapsed_time_str, remain_time_str))

            all_done = (epoch == self.trainer_params['epochs'])
            model_save_interval = self.trainer_params['logging']['model_save_interval']
            img_save_interval = self.trainer_params['logging']['img_save_interval']

            # Save latest images, every epoch
            if False:
                if epoch > 1:
                    self.logger.info("Saving log_image")
                    image_prefix = '{}/latest'.format(self.result_folder)
                    util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'],
                                        self.result_log, labels=['train_score'])
                    util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'],
                                        self.result_log, labels=['train_loss'])

            # Save Model
            if all_done or (epoch % model_save_interval) == 0:
                self.logger.info("Saving trained_model")
                checkpoint_dict = {
                    'epoch': epoch,
                    'model_state_dict': self.model.state_dict(),
                    'optimizer_state_dict': self.optimizer.state_dict(),
                    'scheduler_state_dict': self.scheduler.state_dict(),
                    'result_log': self.result_log.get_raw_data()
                }
                torch.save(checkpoint_dict, '{}/checkpoint-{}.pt'.format(self.result_folder, epoch))

            # Save Image
            if False:
                if all_done or (epoch % img_save_interval) == 0:
                    image_prefix = '{}/img/checkpoint-{}'.format(self.result_folder, epoch)
                    util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'],
                                        self.result_log, labels=['train_score'])
                    util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'],
                                        self.result_log, labels=['train_loss'])

            # All-done announcement
            if all_done:
                self.logger.info(" *** Training Done *** ")
                self.logger.info("Now, printing log array...")
                util_print_log_array(self.logger, self.result_log)

    def _train_one_batch1(self, problem_size, batch_size, capacity, epoch):

        # Prep
        ###############################################
        self.model.train()

        pomo_size = problem_size
        self.env.load_problems_cvrp(batch_size,
                                    problem_size=problem_size,
                                    capacity=capacity,
                                    pomo_size=pomo_size,
                                    device=self.device)
        reset_state, _, _ = self.env.reset()
        self.model.pre_forward(reset_state)

        prob_list = torch.zeros(size=(batch_size, pomo_size, 0))
        # shape: (batch, pomo, 0~problem)

        # POMO Rollout
        ###############################################
        state, reward, done = self.env.pre_step()

        while not done:
            cur_dist = self.env.get_local_feature()
            selected, prob = self.model(state, cur_dist)
            # shape: (batch, pomo)
            state, reward, done = self.env.step(selected)
            prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2)

        # Loss
        ###############################################
        advantage = reward - reward.float().mean(dim=1, keepdims=True)
        # shape: (batch, pomo)
        log_prob = prob_list.log().sum(dim=2)
        # size = (batch, pomo)
        loss = -advantage * log_prob  # Minus Sign: To Increase REWARD
        # shape: (batch, pomo)
        if epoch < self.lr_decay_epoch:
            # In stage 1 and stage 2, we use all trajectories to train the model
            ###############################################
            loss_mean = loss.mean()
        else:
            # In stage 3, we use topk advantage to improve the performance
            ###############################################
            # get topk with the biggest advantage(best reward)
            topk_advantage, topk_advantage_index = torch.topk(advantage, k=self.trainer_params['k_value'],
                                                              dim=-1, largest=True, sorted=True)
            # shape: (batch,k)
            topk_advantage_log_prob = log_prob.gather(dim=-1, index=topk_advantage_index)
            loss_best = -topk_advantage * topk_advantage_log_prob  # Minus Sign: To Increase REWARD
            # shape: (batch, k)
            beta = self.trainer_params['beta']
            loss_mean = loss.mean() + beta * loss_best.mean()

        # Score
        ###############################################
        max_pomo_reward, _ = reward.max(dim=1)  # get best results from pomo
        score_mean = -max_pomo_reward.float().mean()  # negative sign to make positive value

        # Step & Return
        ###############################################
        self.optimizer.zero_grad()
        loss_mean.backward()
        # Gradient Clipping by torch.nn.utils.clip_grad_norm_ (for avoiding gradient explosion)
        max_norm = self.trainer_params['max_norm']
        torch.nn.utils.clip_grad_norm_(parameters=self.model.parameters(), max_norm=max_norm)
        self.optimizer.step()
        return score_mean.item(), loss_mean.item()

    def _train_one_epoch(self, epoch, scaler):

        score_AM = AverageMeter()
        loss_AM = AverageMeter()

        train_num_episode = self.trainer_params['train_episodes']
        episode = 0
        loop_cnt = 0
        while episode < train_num_episode:

            remaining = train_num_episode - episode
            batch_size = min(self.trainer_params['train_batch_size'], remaining)

            avg_score, avg_loss = self._train_one_batch(batch_size, scaler, epoch)
            score_AM.update(avg_score, batch_size)
            loss_AM.update(avg_loss, batch_size)

            episode += batch_size

            # Log First 10 Batch, only at the first epoch
            if epoch == self.start_epoch:
                loop_cnt += 1
                if loop_cnt <= 10000:
                    self.logger.info('Epoch {:3d}: Train {:3d}/{:3d}({:1.1f}%)  Score: {:.4f},  Loss: {:.4f}'
                                     .format(epoch, episode, train_num_episode, 100. * episode / train_num_episode,
                                             score_AM.avg, loss_AM.avg))

        # Log Once, for each epoch
        self.logger.info('Epoch {:3d}: Train ({:3.0f}%)  Score: {:.4f},  Loss: {:.4f}'
                         .format(epoch, 100. * episode / train_num_episode,
                                 score_AM.avg, loss_AM.avg))

        return score_AM.avg, loss_AM.avg

    def _train_one_batch(self, batch_size, scaler, epoch):

        # Prep
        total_params = sum(p.numel() for p in self.model.parameters())
        print(f"Total parameters: {total_params:,}")
        ###############################################
        if self.model_params["train_partition"]:
            self.model.eval()
            for param in self.model.parameters():
                param.requires_grad = False
            self.model.partition.train()
            for param in self.model.partition.parameters():  # or model.decoder.mlp.parameters()
                param.requires_grad = True
            for i, layer in enumerate(self.model.encoder.layers):
                for param in layer.edge_encoder.parameters():
                    param.requires_grad = True
        else:
            self.model.train()
        self.env.load_problems(batch_size)
        reset_state, _, _ = self.env.reset()
        if isinstance(self.model, torch.nn.DataParallel):
            self.model.module.pre_forward(reset_state)
        else:
            self.model.pre_forward(reset_state)
        # self.model.pre_forward(reset_state)

        prob_list = torch.zeros(size=(batch_size, self.env.pomo_size, 0))
        # shape: (batch, pomo, 0~problem)

        # POMO Rollout
        ###############################################
        state, reward, done = self.env.pre_step()
        counter = 0
        with autocast(device_type='cuda', dtype=torch.float16):
            while not done:
                counter += 1
                selected, prob = self.model(state)
                # shape: (batch, pomo)
                state, reward, done = self.env.step(selected)
                prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2)

        # Loss
        ###############################################
        advantage = reward - reward.float().mean(dim=1, keepdims=True)
        # shape: (batch, pomo)
        log_prob = prob_list.log().sum(dim=2)
        # size = (batch, pomo)
        loss = -advantage * log_prob  # Minus Sign: To Increase REWARD
        # shape: (batch, pomo)
        if  False:
            # In stage 1 and stage 2, we use all trajectories to train the model
            ###############################################
            loss_mean = loss.mean()
        else:
            # In stage 3, we use topk advantage to improve the performance
            ###############################################
            # get topk with the biggest advantage(best reward)
            topk_advantage, topk_advantage_index = torch.topk(advantage, k=self.trainer_params['k_value'],
                                                              dim=-1, largest=True, sorted=True)
            # shape: (batch,k)
            topk_advantage_log_prob = log_prob.gather(dim=-1, index=topk_advantage_index)
            loss_best = -topk_advantage * topk_advantage_log_prob  # Minus Sign: To Increase REWARD
            # shape: (batch, k)
            beta = self.trainer_params['beta']
            loss_mean = loss.mean() + beta * loss_best.mean()

        # advantage = reward - reward.float().mean(dim=1, keepdims=True)
        # # shape: (batch, pomo)
        # log_prob = prob_list.log().sum(dim=2)
        # # size = (batch, pomo)
        # loss = -advantage * log_prob  # Minus Sign: To Increase REWARD
        # # shape: (batch, pomo)
        # loss_mean = loss.mean()

        # Score
        ###############################################
        max_pomo_reward, _ = reward.max(dim=1)  # get best results from pomo
        score_mean = -max_pomo_reward.float().mean()  # negative sign to make positive value

        # Step & Return
        ###############################################
        # self.model.zero_grad()
        # scaler.scale(loss_mean).backward()
        # scaler.unscale_(self.optimizer)
        # # grad_norms = clip_grad_norms(self.optimizer.param_groups, 1)
        # scaler.step(self.optimizer)
        # scaler.update()
        self.optimizer.zero_grad()
        loss_mean.backward()
        # Gradient Clipping by torch.nn.utils.clip_grad_norm_ (for avoiding gradient explosion)
        max_norm = self.trainer_params['max_norm']
        torch.nn.utils.clip_grad_norm_(parameters=self.model.parameters(), max_norm=max_norm)
        self.optimizer.step()
        # loss_mean.backward()
        # self.optimizer.step()
        return score_mean.item(), loss_mean.item()


def clip_grad_norms(param_groups, max_norm=math.inf):
    """
    Clips the norms for all param groups to max_norm and returns gradient norms before clipping
    :param optimizer:
    :param max_norm:
    :param gradient_norms_log:
    :return: grad_norms, clipped_grad_norms: list with (clipped) gradient norms per group
    """
    grad_norms = [
        torch.nn.utils.clip_grad_norm_(
            group['params'],
            max_norm if max_norm > 0 else math.inf,  # Inf so no clipping but still call to calc
            norm_type=2
        )
        for group in param_groups
    ]
    grad_norms_clipped = [min(g_norm, max_norm) for g_norm in grad_norms] if max_norm > 0 else grad_norms
    return grad_norms, grad_norms_clipped
