import numpy as np
import torch
import matplotlib.pyplot as plt
import shutil

from tqdm import tqdm

plt.switch_backend('agg')

def adjust_learning_rate(accelerator, optimizer, scheduler, epoch, args, printout=True):
    if args.lradj == 'type1':
        lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
    elif args.lradj == 'type2':
        lr_adjust = {
            2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
            10: 5e-7, 15: 1e-7, 20: 5e-8
        }
    elif args.lradj == 'type3':
        lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
    elif args.lradj == 'PEMS':
        lr_adjust = {epoch: args.learning_rate * (0.95 ** (epoch // 1))}
    elif args.lradj == 'TST':
        lr_adjust = {epoch: scheduler.get_last_lr()[0]}
    elif args.lradj == 'constant':
        lr_adjust = {epoch: args.learning_rate}
    if epoch in lr_adjust.keys():
        lr = lr_adjust[epoch]
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        if printout:
            if accelerator is not None:
                accelerator.print('Updating learning rate to {}'.format(lr))
            else:
                print('Updating learning rate to {}'.format(lr))


class EarlyStopping:
    def __init__(self, accelerator=None, patience=7, verbose=False, delta=0, save_mode=True):
        self.accelerator = accelerator
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.save_mode = save_mode

    def __call__(self, val_loss, model, path):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            if self.save_mode:
                self.save_checkpoint(val_loss, model, path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.accelerator is None:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            else:
                self.accelerator.print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            if self.save_mode:
                self.save_checkpoint(val_loss, model, path)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, path):
        if self.verbose:
            if self.accelerator is not None:
                self.accelerator.print(
                    f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
            else:
                print(
                    f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')

        if self.accelerator is not None:
            model = self.accelerator.unwrap_model(model)
            state_dict = self.remove_llm(model.state_dict())
            torch.save(state_dict, path + '/' + 'checkpoint')
        else:
            state_dict = self.remove_llm(model.state_dict())
            torch.save(state_dict, path + '/' + 'checkpoint')
        self.val_loss_min = val_loss
    
    def remove_llm(self, state_dict):
        new_state_dict = {}
        for k, v in state_dict.items():
            if 'llama' not in k and 'moirai' not in k:
                new_state_dict[k] = v
        return new_state_dict


def del_files(dir_path):
    shutil.rmtree(dir_path)

def vali(args, accelerator, model, vali_data, vali_loader, criterion, mae_metric):
    total_loss = []
    total_mae_loss = []
    top_1_mae = []
    top_5_mae = []
    model.eval()
    preds = []
    for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in tqdm(enumerate(vali_loader)):
            batch_x = batch_x.float().to(accelerator.device)
            batch_y = batch_y.float()
            
            dec_inp = None
            
            if args.use_amp:
                with torch.cuda.amp.autocast():
                    if args.output_attention:
                        outputs, cali_conf, saliency, entropy = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                    else:
                        outputs, cali_conf, saliency, entropy = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
            else:
                if args.output_attention:
                    outputs, cali_conf, saliency, entropy = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                else:
                    outputs, cali_conf, saliency, entropy = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

            f_dim = -1 if args.features == 'MS' else 0
            outputs = outputs[:, -args.pred_len:, f_dim:]
            batch_y = batch_y[:, -args.pred_len:, f_dim:].to(accelerator.device)
            cali_conf = cali_conf[:, -args.pred_len:]

            pred = outputs.detach()
            preds.append({
                'phase_plate': [x_mark for x_mark in batch_x_mark],
                'target_size': [y_mark.detach().cpu().tolist() for y_mark in batch_y_mark],
                'input': [x.detach().cpu().tolist() for x in batch_x],
                'output': pred.detach().cpu().squeeze().tolist(),
                'cali_conf': cali_conf.detach().cpu().tolist(),
                'saliency': saliency.detach().cpu().tolist(),
                'entropy': entropy.detach().cpu().tolist(),
            })
            true = batch_y.detach()

            loss = criterion(pred, true)

            mae_loss, top_1, top_5 = mae_metric(pred, true)
            
            top_1_mae.append(top_1.item())
            top_5_mae.append(top_5.item())
            total_loss.append(loss.item())
            total_mae_loss.append(mae_loss.item())

    total_loss = np.average(total_loss)
    total_mae_loss = np.average(total_mae_loss)
    total_top_1_mae = np.average(top_1_mae)
    total_top_5_mae = np.average(top_5_mae)

    model.train()
    return total_loss, total_mae_loss, preds, total_top_1_mae, total_top_5_mae

def test(args, model, test_loader, criterion, mae_metric, device):
    total_loss = []
    total_mae_loss = []
    top_1_mae = []
    top_5_mae = []
    model.eval()
    preds = []
    for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in tqdm(enumerate(test_loader)):
            batch_x = batch_x.float().to(device)
            batch_y = batch_y.float()
            dec_inp = None
            
            if args.use_amp:
                with torch.cuda.amp.autocast():
                    if args.output_attention:
                        outputs, cali_conf, saliency, entropy = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                    else:
                        outputs, cali_conf, saliency, entropy = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
            else:
                if args.output_attention:
                    outputs, cali_conf, saliency, entropy = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                else:
                    outputs, cali_conf, saliency, entropy = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

            f_dim = -1 if args.features == 'MS' else 0
            outputs = outputs[:, -args.pred_len:, f_dim:]
            batch_y = batch_y[:, -args.pred_len:, f_dim:].to(device)
            cali_conf = cali_conf[:, -args.pred_len:]

            pred = outputs.detach()
            preds.append({
                'phase_plate': [x_mark for x_mark in batch_x_mark],
                'target_size': [y_mark.detach().cpu().tolist() for y_mark in batch_y_mark],
                'input': [x.detach().cpu().tolist() for x in batch_x],
                'output': pred.detach().cpu().squeeze().tolist(),
                'cali_conf': cali_conf.detach().cpu().tolist(),
                'saliency': saliency.detach().cpu().tolist(),
                'entropy': entropy.detach().cpu().tolist(),
            })
            true = batch_y.detach()

            loss = criterion(pred, true)

            mae_loss, top_1, top_5 = mae_metric(pred, true)
            
            top_1_mae.append(top_1.item())
            top_5_mae.append(top_5.item())
            total_loss.append(loss.item())
            total_mae_loss.append(mae_loss.item())

    total_loss = np.average(total_loss)
    total_mae_loss = np.average(total_mae_loss)
    total_top_1_mae = np.average(top_1_mae)
    total_top_5_mae = np.average(top_5_mae)

    model.train()
    return total_loss, total_mae_loss, preds, total_top_1_mae, total_top_5_mae