import torch
import numpy as np
import torch.nn as nn
from torch.utils.data import DataLoader
from torch import optim
from torch.utils.tensorboard import SummaryWriter
from syne_tune import Reporter
import logging
import math

from benchmarking.utils.experiment import Experiment
from .model import ConvAE
from ...train_utils.utils import EarlyStopper, save_checkpoint
from models.data_utils.util import get_device


class ConvAETrainer:
    "Trainer for the ConvAE."

    def __init__(self, model: ConvAE, print_loss_every: int = 50) -> None:
        self.use_cuda = torch.cuda.is_available()
        self.model = model
        self.print_loss_every = print_loss_every
        self.num_steps = 0
        self.writer = SummaryWriter()

        if self.use_cuda:
            self.model.cuda()

    def train(
        self,
        train_loader: DataLoader,
        test_loader: DataLoader,
        epochs: int = 30,
        lr=1e-4,
        hyperparametertuning: bool = False,
        experiment: Experiment = None,
        early_stopping: bool = True,
        patience: int = 30,
        min_delta: float = 1.02,
        checkpointing: bool = True,
        device_id: int = None,
    ) -> None:
        self.model.train()
        device = get_device(device_id)
        self.model.to(device)
        optimizer = optim.Adam(self.model.parameters(), lr=lr)

        # Performance reporting to syne-tune
        if hyperparametertuning:
            report = Reporter()

        if early_stopping:
            early_stopper = EarlyStopper(patience=patience, min_delta=min_delta)

        for epoch in range(epochs):
            epoch_loss = 0.0

            # Run one epoch of training.
            num_train_batches = math.ceil(
                len(train_loader.dataset) / train_loader.batch_size
            )
            for batch in train_loader:
                if self.use_cuda:
                    batch = batch.to(device)

                # Reshape batch to (batch_size, 1, input_dim)
                batch_size = len(batch)
                batch = batch.view(batch_size, 1, self.model.input_dim)

                optimizer.zero_grad()

                recon, _ = self.model(batch)
                loss = nn.MSELoss()(recon, batch)
                epoch_loss += loss
                self.num_steps += 1

                # Print loss if step is eligible
                if self.num_steps % self.print_loss_every == 0:
                    logging.info(f"Step {self.num_steps} loss: {loss:.8f}")

                # Submit loss to tensor board
                self.writer.add_scalar("[Train] Total loss", loss, self.num_steps)

                loss.backward()
                optimizer.step()

            self.writer.add_scalar(
                "[Train] Avg. epoch loss",
                epoch_loss / num_train_batches,
                epoch + 1,
            )

            # Now get test loss
            epoch_test_loss = 0.0

            num_test_batches = math.ceil(
                len(test_loader.dataset) / test_loader.batch_size
            )
            for batch in test_loader:
                if self.use_cuda:
                    batch = batch.to(device)

                # Reshape batch to (batch_size, 1, input_dim)
                batch_size = len(batch)
                batch = batch.view(batch_size, 1, self.model.input_dim)

                with torch.no_grad():
                    recon, _ = self.model(batch)
                    loss = nn.MSELoss()(recon, batch)
                    epoch_test_loss += loss

            # Report the test loss to syne tune
            if hyperparametertuning:
                report_loss = epoch_test_loss.item() / num_test_batches
                assert not np.isnan(report_loss), "Loss cannot be NaN"
                report(step=epoch, mean_loss=report_loss, epoch=epoch + 1)

            # Report to the received experiment instance in model comparison
            if experiment:
                experiment.train_loss_mse.append(epoch_loss.item() / num_train_batches)
                experiment.test_loss_mse.append(
                    epoch_test_loss.item() / num_test_batches
                )
            logging.info(
                f"Epoch [{epoch + 1}/{epochs}], Train Loss: {epoch_loss/num_train_batches:.8f}, Test Loss: {epoch_test_loss/num_test_batches:.8f}\n"
            )

            # Just take the last batch in the test set.
            self.writer.add_scalar(
                "[Test] Avg. epoch loss",
                epoch_test_loss / num_test_batches,
                epoch + 1,
            )

            if checkpointing:
                save_checkpoint(
                    model=self.model,
                    experiment=experiment,
                    device_id=device_id,
                    epoch=epoch,
                    best=(
                        early_stopper.is_best(epoch_test_loss.item() / num_test_batches)
                        if early_stopping
                        else None
                    ),
                )

            if early_stopping and early_stopper.early_stop(
                epoch_test_loss.item() / num_test_batches
            ):
                logging.info("Training was stopped early.")
                break

        # Finally close the tensorboard writer.
        self.writer.flush()
        self.writer.close()
