from __future__ import annotations

import inspect
import os
import time
from typing import Tuple

import numpy as np
import torch
import torch.nn as nn
from torch import optim

from data_provider.data_factory import data_provider
from .exp_basic import ExpBasic
from models import FACT
from utils.metrics import metric
from utils.tools import EarlyStopping, adjust_learning_rate


class ExpMain(ExpBasic):

    def __init__(self, args):
        super().__init__(args)

    def _build_model(self):
        return FACT(self.args).float()

    def _get_data(self, flag: str):
        return data_provider(self.args, flag)

    def _select_optimizer(self):
        return optim.Adam(self.model.parameters(), lr=self.args.learning_rate)

    def _select_criterion(self):
        return nn.MSELoss()

    def _load_state_dict(self, ckpt_path: str):

        load_kwargs = {'map_location': self.device}
        if 'weights_only' in inspect.signature(torch.load).parameters:
            load_kwargs['weights_only'] = True
        return torch.load(ckpt_path, **load_kwargs)

    def _prepare_batch(self, batch) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        batch_x, batch_y, batch_x_mark, batch_y_mark = batch

        batch_x = batch_x.float().to(self.device)
        batch_y = batch_y.float().to(self.device)

        if self.args.data == 'Solar':
            batch_x_mark = None
            batch_y_mark = None
        else:
            batch_x_mark = batch_x_mark.float().to(self.device)
            batch_y_mark = batch_y_mark.float().to(self.device)

        return batch_x, batch_y, batch_x_mark

    def _forward_model(self, batch_x, batch_y, batch_x_mark):
        outputs = self.model(batch_x, batch_x_mark)
        f_dim = -1 if self.args.features == 'MS' else 0
        outputs = outputs[:, :, f_dim:]
        target = batch_y[:, -self.args.pred_len:, f_dim:]
        return outputs, target

    def vali(self, vali_data, vali_loader, criterion):
        total_loss = []
        self.model.eval()
        with torch.no_grad():
            for batch in vali_loader:
                batch_x, batch_y, batch_x_mark = self._prepare_batch(batch)
                outputs, target = self._forward_model(
                    batch_x, batch_y, batch_x_mark)
                loss = criterion(outputs, target)
                total_loss.append(loss.item())
        self.model.train()
        return np.average(total_loss)

    def train(self, setting):
        train_data, train_loader = self._get_data('train')
        vali_data, vali_loader = self._get_data('val')
        test_data, test_loader = self._get_data('test')

        path = os.path.join(self.args.checkpoints, setting)
        os.makedirs(path, exist_ok=True)

        early_stopping = EarlyStopping(
            patience=self.args.patience, verbose=True)
        optimizer = self._select_optimizer()
        criterion = self._select_criterion()

        epoch_times = []
        for epoch in range(self.args.train_epochs):
            iter_count = 0
            train_loss = []
            epoch_start = time.time()
            self.model.train()

            for i, batch in enumerate(train_loader):
                iter_count += 1
                optimizer.zero_grad()

                batch_x, batch_y, batch_x_mark = self._prepare_batch(batch)
                outputs, target = self._forward_model(
                    batch_x, batch_y, batch_x_mark)

                loss_task = criterion(outputs, target)
                reg = torch.tensor(0.0, device=self.device)

                lambda_coh = getattr(self.args, 'lambda_coh', 0.0)
                lambda_phase = getattr(self.args, 'lambda_phase', 0.0)
                coh_loss = torch.tensor(0.0, device=self.device)
                phase_loss = torch.tensor(0.0, device=self.device)

                if self.args.model == 'FACT' and (lambda_coh > 0 or lambda_phase > 0):
                    try:
                        priors = getattr(
                            self.model, 'last_channel_priors', None)
                        rep = self.model.get_last_channel_repr() if hasattr(
                            self.model, 'get_last_channel_repr') else None
                        if priors is not None and rep is not None:
                            X = torch.abs(rep)
                            Xc = X - X.mean(dim=-1, keepdim=True)
                            denom = (Xc.pow(2).sum(
                                dim=-1, keepdim=True).sqrt() + 1e-8)
                            Xn = Xc / denom
                            sim = torch.matmul(Xn, Xn.transpose(-1, -2))
                            if lambda_coh > 0 and 'gamma' in priors and priors['gamma'] is not None:
                                gamma = priors['gamma'].detach()
                                coh_loss = torch.nn.functional.mse_loss(
                                    sim, gamma)
                                reg = reg + lambda_coh * coh_loss

                            if lambda_phase > 0 and 'phi' in priors and priors['phi'] is not None:
                                theta = torch.angle(rep)
                                sin_t = torch.sin(theta).mean(dim=-1)
                                cos_t = torch.cos(theta).mean(dim=-1)
                                phi_hat = torch.einsum(
                                    'bi,bj->bij', sin_t, cos_t) - torch.einsum('bi,bj->bij', cos_t, sin_t)
                                max_abs = phi_hat.abs().amax(dim=(-2, -1), keepdim=True) + 1e-8
                                phi_hat = phi_hat / max_abs
                                phi = priors['phi'].detach()
                                phase_loss = torch.nn.functional.mse_loss(
                                    phi_hat, phi)
                                reg = reg + lambda_phase * phase_loss
                    except Exception as exc:
                        print(
                            f"Warning: regularizer computation skipped ({exc})")

                loss = loss_task + reg
                loss.backward()
                optimizer.step()

                train_loss.append(loss.item())

                if (i + 1) % 100 == 0:
                    print(
                        f"\titers: {i + 1}, epoch: {epoch + 1} | loss: {loss.item():.7f}")

            epoch_duration = time.time() - epoch_start
            epoch_times.append(epoch_duration)
            train_loss_value = np.average(train_loss)
            vali_loss = self.vali(vali_data, vali_loader, criterion)
            test_loss = self.vali(test_data, test_loader, criterion)

            print(
                f"Epoch: {epoch + 1}, Steps: {len(train_loader)} | "
                f"Train Loss: {train_loss_value:.7f} Vali Loss: {vali_loss:.7f} Test Loss: {test_loss:.7f}"
            )

            early_stopping(vali_loss, self.model, path)
            if early_stopping.early_stop:
                print("Early stopping triggered")
                break

            adjust_learning_rate(optimizer, epoch + 1, self.args)

        best_model_path = os.path.join(path, 'checkpoint.pth')
        self.model.load_state_dict(self._load_state_dict(best_model_path))

        self.last_avg_epoch_time = float(
            sum(epoch_times) / len(epoch_times)) if epoch_times else None
        return self.model

    def test(self, setting, test=0, training_time=None):
        test_data, test_loader = self._get_data('test')
        if test:
            print('Loading saved model checkpoint')
            self.model.load_state_dict(
                self._load_state_dict(os.path.join(
                    self.args.checkpoints, setting, 'checkpoint.pth'))
            )

        preds, trues = [], []
        folder_path = os.path.join('./test_results', setting)
        os.makedirs(folder_path, exist_ok=True)

        self.model.eval()
        with torch.no_grad():
            for batch in test_loader:
                batch_x, batch_y, batch_x_mark = self._prepare_batch(batch)
                outputs, target = self._forward_model(
                    batch_x, batch_y, batch_x_mark)

                preds.append(outputs.detach().cpu().numpy())
                trues.append(target.detach().cpu().numpy())

        preds = np.concatenate(preds, axis=0)
        trues = np.concatenate(trues, axis=0)

        mae, mse, rmse, mape, mspe, rse, corr, nd, nrmse = metric(preds, trues)
        print('Test metrics:')
        print(f'MSE: {mse:.4f} | MAE: {mae:.4f} | RMSE: {rmse:.4f}')
        print(f'MAPE: {mape:.4f} | MSPE: {mspe:.4f} | RSE: {rse:.4f}')
        print(f'CORR: {corr:.4f} | ND: {nd:.4f} | NRMSE: {nrmse:.4f}')

        np.save(os.path.join(folder_path, 'pred.npy'), preds)
        np.save(os.path.join(folder_path, 'true.npy'), trues)

        if training_time is not None:
            with open(os.path.join(folder_path, 'runtime.txt'), 'w') as f:
                f.write(f'average_epoch_time: {training_time}\n')

        return


Exp_Main = ExpMain
