# Copyright 2022 Twitter, Inc and Zhendong Wang.
# SPDX-License-Identifier: Apache-2.0

import copy
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F


from agents.helpers import (cosine_beta_schedule,
                            linear_beta_schedule,
                            vp_beta_schedule,
                            extract,
                            Losses)
from utils.utils import Progress, Silent


class Diffusion(nn.Module):
    def __init__(self, state_dim, action_dim, model, max_action,
                 using_guide=False, prob_unconditional=0.1, guide_weight=4.,
                 using_grad_guide=False, grad_fn=None,
                 using_balance_loss=False, condition_sets=None, balance_loss_weight=1.,
                 beta_schedule='linear', n_timesteps=100,
                 loss_type='l2', clip_denoised=True, predict_epsilon=True):
        super(Diffusion, self).__init__()

        self.state_dim = state_dim
        self.action_dim = action_dim
        self.max_action = max_action
        self.model = model

        if beta_schedule == 'linear':
            betas = linear_beta_schedule(n_timesteps)
        elif beta_schedule == 'cosine':
            betas = cosine_beta_schedule(n_timesteps)
        elif beta_schedule == 'vp':
            betas = vp_beta_schedule(n_timesteps)

        alphas = 1. - betas
        alphas_cumprod = torch.cumprod(alphas, axis=0)
        alphas_cumprod_prev = torch.cat([torch.ones(1), alphas_cumprod[:-1]])

        self.n_timesteps = int(n_timesteps)
        self.clip_denoised = clip_denoised
        self.predict_epsilon = predict_epsilon

        self.using_guide = using_guide
        self.prob_unconditional = prob_unconditional
        self.guide_weight = guide_weight
        self.using_balance_loss = using_balance_loss
        self.condition_sets = condition_sets
        self.balance_loss_weight = balance_loss_weight
        if self.using_balance_loss:
            assert self.predict_epsilon
            assert self.condition_sets is not None

        self.using_grad_guide = using_grad_guide
        self.grad_fn = grad_fn
        assert not self.using_guide or not self.using_grad_guide
        assert not self.using_grad_guide or (self.grad_fn is not None and self.predict_epsilon)

        self.register_buffer('betas', betas)
        self.register_buffer('alphas_cumprod', alphas_cumprod)
        self.register_buffer('alphas_cumprod_prev', alphas_cumprod_prev)

        # calculations for diffusion q(x_t | x_{t-1}) and others
        self.register_buffer('sqrt_alphas_cumprod', torch.sqrt(alphas_cumprod))
        self.register_buffer('sqrt_one_minus_alphas_cumprod', torch.sqrt(1. - alphas_cumprod))
        self.register_buffer('log_one_minus_alphas_cumprod', torch.log(1. - alphas_cumprod))
        self.register_buffer('sqrt_recip_alphas_cumprod', torch.sqrt(1. / alphas_cumprod))
        self.register_buffer('sqrt_recipm1_alphas_cumprod', torch.sqrt(1. / alphas_cumprod - 1))

        # calculations for posterior q(x_{t-1} | x_t, x_0)
        posterior_variance = betas * (1. - alphas_cumprod_prev) / (1. - alphas_cumprod)
        self.register_buffer('posterior_variance', posterior_variance)

        ## log calculation clipped because the posterior variance
        ## is 0 at the beginning of the diffusion chain
        self.register_buffer('posterior_log_variance_clipped',
                             torch.log(torch.clamp(posterior_variance, min=1e-20)))
        self.register_buffer('posterior_mean_coef1',
                             betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod))
        self.register_buffer('posterior_mean_coef2',
                             (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod))

        self.loss_fn = Losses[loss_type]()

    # ------------------------------------------ sampling ------------------------------------------#

    def _predict_start_from_noise(self, x_t, t, noise):
        '''
            if self.predict_epsilon, model output is (scaled) noise;
            otherwise, model predicts x0 directly
        '''
        if self.predict_epsilon:
            return (
                    extract(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t -
                    extract(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise
            )
        else:
            return noise

    def _q_posterior(self, x_start, x_t, t):
        posterior_mean = (
                extract(self.posterior_mean_coef1, t, x_t.shape) * x_start +
                extract(self.posterior_mean_coef2, t, x_t.shape) * x_t
        )
        posterior_variance = extract(self.posterior_variance, t, x_t.shape)
        posterior_log_variance_clipped = extract(self.posterior_log_variance_clipped, t, x_t.shape)
        return posterior_mean, posterior_variance, posterior_log_variance_clipped

    def _p_mean_variance(self, x, t, s, condition):
        if self.using_guide:
            x_c, x_none = self.model.forward_multiple_conditions(x, t, s, conditions=[condition, None])
            noise = x_none + self.guide_weight * (x_c - x_none)
        elif self.using_grad_guide:
            noise = self.model(x, t, s, condition=None) - self.guide_weight * self.grad_fn(x)
        else:
            noise = self.model(x, t, s, condition=None)
        x_recon = self._predict_start_from_noise(x, t=t, noise=noise)

        if self.clip_denoised:
            x_recon.clamp_(-self.max_action, self.max_action)
        else:
            assert RuntimeError()

        model_mean, posterior_variance, posterior_log_variance = self._q_posterior(x_start=x_recon, x_t=x, t=t)
        return model_mean, posterior_variance, posterior_log_variance

    # @torch.no_grad()
    def _p_sample(self, x, t, s, condition):
        b, *_, device = *x.shape, x.device
        model_mean, _, model_log_variance = self._p_mean_variance(x=x, t=t, s=s, condition=condition)
        noise = torch.randn_like(x)
        # no noise when t == 0
        nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1)))
        return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise

    # @torch.no_grad()
    def _p_sample_loop(self, state, shape, condition, verbose=False, return_diffusion=False):
        device = self.betas.device

        batch_size = shape[0]
        x = torch.randn(shape, device=device)

        if return_diffusion: diffusion = [x]

        progress = Progress(self.n_timesteps) if verbose else Silent()
        for i in reversed(range(0, self.n_timesteps)):
            timesteps = torch.full((batch_size,), i, device=device, dtype=torch.long)
            x = self._p_sample(x, timesteps, state, condition=condition)

            progress.update({'t': i})

            if return_diffusion: diffusion.append(x)

        progress.close()

        if return_diffusion:
            return x, torch.stack(diffusion, dim=1)
        else:
            return x

    # @torch.no_grad()
    def sample(self, state, condition=None, *args, **kwargs):
        condition = condition if self.using_guide else None
        batch_size = state.shape[0]
        shape = (batch_size, self.action_dim)
        action = self._p_sample_loop(state, shape, condition, *args, **kwargs)
        return action.clamp_(-self.max_action, self.max_action)

    def forward(self, state, condition=None, *args, **kwargs):
        condition = condition if self.using_guide else None
        return self.sample(state, condition, *args, **kwargs)
    # ------------------------------------------ training ------------------------------------------#

    def _q_sample(self, x_start, t, noise=None):
        if noise is None:
            noise = torch.randn_like(x_start)

        sample = (
                extract(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start +
                extract(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise
        )

        return sample

    def _p_losses(self, x_start, state, t, condition, weights=1.0):
        noise = torch.randn_like(x_start)

        x_noisy = self._q_sample(x_start=x_start, t=t, noise=noise)

        if condition is None or not self.using_balance_loss:
            x_recon = self.model(x_noisy, t, state, condition=condition)
        else:
            # x_recon = self.model(x_noisy, t, state, condition=condition)
            condition1 = [self.condition_sets[i]
                          for i in np.random.randint(0, len(self.condition_sets), x_start.shape[0])]
            condition1 = torch.Tensor(condition1).float().to(x_start.device)
            # x_recon1 = self.model(x_noisy, t, state, condition=condition1)
            x_recon, x_recon1 = self.model.forward_multiple_conditions(
                x_noisy, t, state, conditions=[condition, condition1])

        assert noise.shape == x_recon.shape

        if self.predict_epsilon:
            loss = self.loss_fn(x_recon, noise, weights)
        else:
            loss = self.loss_fn(x_recon, x_start, weights)

        if condition is not None and self.using_balance_loss:
            weight = t[:, None] / self.n_timesteps
            loss_reg = weight * F.mse_loss(x_recon, x_recon1.detach(), reduction='none')
            loss_com = weight * F.mse_loss(x_recon.detach(), x_recon1, reduction='none')

            loss = loss + self.balance_loss_weight * (loss_reg + 0.25 * loss_com).mean()

        return loss

    def loss(self, x, state, condition=None, weights=1.0):
        condition = condition if self.using_guide else None
        condition = condition if np.random.rand() > self.prob_unconditional else None
        batch_size = len(x)
        t = torch.randint(0, self.n_timesteps, (batch_size,), device=x.device).long()
        return self._p_losses(x, state, t, condition, weights)

