# Code modified from https://github.com/pytorchbearer/torchbearer

import torch
import torch.nn.functional as F
from torch.distributions.beta import Beta
from fmix import sample_mask, FMixBase

import torchbearer
from torchbearer.callbacks import Callback
from torchbearer import metrics as m

import torch.nn.functional as F
import torchbearer
from torchbearer.callbacks import Callback
from fmix import sample_mask, FMixBase
import torch


from torchbearer.metrics import default as d
from utils.reformulated_mixup_all import MixupAcc, MixupAcc_lam

def fmix_loss(input, y, index, lam, train=True, reformulate=False, bce_loss=False, lam_train=False):
    r"""Criterion for fmix

    Args:
        input: If train, mixed input. If not train, standard input
        y: Targets for first image
        index: Permutation for mixing
        lam: Lambda value of mixing
        train: If true, sum cross entropy of input with y1 and y2, weighted by lam/(1-lam). If false, cross entropy loss with y1
    """
    loss_fn = F.cross_entropy if not bce_loss else F.binary_cross_entropy_with_logits
    input1 = input
    if lam_train:
        input1, input2 = input
    if train and not reformulate:
        y2 = y[index]
        if lam_train:
            return loss_fn(input1, y) * lam + loss_fn(input1, y2) * (1 - lam) * 0.8 + 0.2 * F.l1_loss(input2, lam * torch.ones_like(input2).to(input2.device))
        else:
            return loss_fn(input1, y) * lam + loss_fn(input1, y2) * (1 - lam)
    else:
        return loss_fn(input1, y)

class FMix_100(FMixBase, Callback):
    r""" FMix augmentation

        Args:
            decay_power (float): Decay power for frequency decay prop 1/f**d
            alpha (float): Alpha value for beta distribution from which to sample mean of mask
            size ([int] | [int, int] | [int, int, int]): Shape of desired mask, list up to 3 dims. -1 computes on the fly
            max_soft (float): Softening value between 0 and 0.5 which smooths hard edges in the mask.
            reformulate (bool): If True, uses the reformulation of [1].

        Example
        -------

        .. code-block:: python

            fmix = FMix(...)
            trial = Trial(model, optimiser, fmix.loss(), callbacks=[fmix])
            # ...
    """
    def __init__(self, other_generator=None, decay_power=3, alpha=1, size=(32, 32), max_soft=0.0, reformulate=False,fout=False, lam_train=False):
        super().__init__(decay_power, alpha, size, max_soft, reformulate)
        self.other_generator =  other_generator
        self.iterator = None
        self.fout = fout
        self.reformulate = reformulate
        self.lam_train=lam_train

    def on_start_epoch(self, state):
        if self.reformulate:
            self.iterator = iter(self.other_generator)

    def on_sample(self, state):
        super().on_sample(state)
        x, y = state[torchbearer.X], state[torchbearer.Y_TRUE]
        device = state[torchbearer.DEVICE]

        x = self(x)

        # Store the results
        state[torchbearer.X] = x
        state[torchbearer.Y_TRUE] = y

        # Set mixup flags
        state[torchbearer.MIXUP_LAMBDA] = torch.tensor([self.lam], device=device) if not self.reformulate else torch.tensor([1.], device=device)
        state[torchbearer.MIXUP_PERMUTATION] = self.index

        if self.lam_train:
            d.__loss_map__[FMix_100().loss().__name__] = MixupAcc_lam
        else:
            d.__loss_map__[FMix_100().loss().__name__] = MixupAcc

    def __call__(self, x):
        size = []
        for i, s in enumerate(self.size):
            if s != -1:
                size.append(s)
            else:
                size.append(x.shape[i+1])

        lam, mask = sample_mask(self.alpha, self.decay_power, size, self.max_soft, self.reformulate)
        index = torch.randperm(x.size(0)).to(x.device)
        mask = torch.from_numpy(mask).float().to(x.device)

        # Mix the images
        x1 = mask * x
        if self.reformulate:
            other, _ = next(self.iterator)
            other = other[:(x.shape[0])]
            if other.shape != x.shape:
                other = F.pad(other,(2,2,2,2))
            if self.fout:
                other = torch.zeros_like(x)
            x2 = (1 - mask) * other.to(x.device)
        else:
            x2 = (1 - mask) * x[index]
        self.index = index
        self.lam = lam
        return x1 + x2

    def loss(self, use_bce=False):
        def _fmix_loss(state):
            y_pred = state[torchbearer.Y_PRED]
            y = state[torchbearer.Y_TRUE]
            index = state[torchbearer.MIXUP_PERMUTATION] if torchbearer.MIXUP_PERMUTATION in state else None
            lam = state[torchbearer.MIXUP_LAMBDA] if torchbearer.MIXUP_LAMBDA in state else None
            train = state[torchbearer.MODEL].training
            return fmix_loss(y_pred, y, index, lam, train, self.reformulate, use_bce, self.lam_train)

        return _fmix_loss


# class PointNetFMix(FMix):
#     def __init__(self, resolution, decay_power=3, alpha=1, max_soft=0.0, reformulate=False):
#         super().__init__(decay_power, alpha, [resolution, resolution, resolution], max_soft, reformulate)
#         self.res = resolution
#
#     def __call__(self, x):
#         import kaolin.conversions as cvt
#         x = super().__call__(x)
#         t = []
#         for i in range(x.shape[0]):
#             t.append(cvt.voxelgrid_to_pointcloud(x[i], self.res, normalize=True))
#         return torch.stack(t)
