import numpy as np


def cv_reward_func(state, next_state):
    mean = np.mean(state)
    std = np.std(state)
    before_cv = (std/mean) * 100

    mean = np.mean(next_state)
    std = np.std(next_state)
    after_cv = (std/mean) * 100

    cv_reward = before_cv - after_cv
    return cv_reward


def reward_func(state, next_state, coef_cv, cv=False):
    reward = np.mean(next_state) - np.mean(state)
    if cv:
        cv_reward = cv_reward_func(state, next_state)
        reward = reward + coef_cv * cv_reward

    return reward



import torch
import torch.nn as nn
import torch.nn.functional as F

import math


class SinusoidalPosEmb(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim

    def forward(self, x):
        device = x.device
        half_dim = self.dim // 2
        emb = math.log(10000) / (half_dim - 1)
        emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
        emb = x[:, None] * emb[None, :]
        emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
        return emb


def cosine_beta_schedule(timesteps, s=0.008, dtype=torch.float32):
    """
    cosine schedule
    as proposed in https://openreview.net/forum?id=-NEXDKk8gZ
    """
    steps = timesteps + 1
    x = np.linspace(0, steps, steps)
    alphas_cumprod = np.cos(((x / steps) + s) / (1 + s) * np.pi * 0.5) ** 2
    alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
    betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1])
    betas_clipped = np.clip(betas, a_min=0, a_max=0.999)
    return torch.tensor(betas_clipped, dtype=dtype)


def linear_beta_schedule(timesteps, beta_start=1e-4, beta_end=2e-2, dtype=torch.float32):
    betas = np.linspace(
        beta_start, beta_end, timesteps
    )
    return torch.tensor(betas, dtype=dtype)


def vp_beta_schedule(timesteps, dtype=torch.float32):
    t = np.arange(1, timesteps + 1)
    T = timesteps
    b_max = 10.
    b_min = 0.1
    alpha = np.exp(-b_min / T - 0.5 * (b_max - b_min) * (2 * t - 1) / T ** 2)
    betas = 1 - alpha
    return torch.tensor(betas, dtype=dtype)


class WeightedLoss(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, pred, targ, weights=1.0):
        """

        :param pred: [batch_size * action_dim]
        :param targ: [batch_size * action_dim]
        :param weights:
        :return:
        """
        loss = self._loss(pred, targ)
        weighted_loss = (loss*weights).mean()
        return weighted_loss


class WeightedL1(WeightedLoss):
    def _loss(self, pred, targ):
        return torch.abs(pred - targ)


class WeightedL2(WeightedLoss):
    def _loss(self, pred, targ):
        return nn.MSELoss(pred, targ)


Losses = {
    'l1': WeightedL1,
    'l2': WeightedL2
}


def extract(a, t, x_shape):
    b, *_ = t.shape
    out = a.gather(-1, t)
    out = out.reshape(b, *((1,) * (len(x_shape) - 1)))
    return out


class Silent:
    def __init__(self, *args, **kwargs):
        pass

    def __getattr__(self, attr):
        return lambda *args: None


class EarlyStopping(object):
    def __init__(self, tolerance=5, min_delta=0):
        self.tolerance = tolerance
        self.min_delta = min_delta
        self.counter = 0
        self.early_stop = False

    def __call__(self, train_loss, validation_loss):
        if (validation_loss-train_loss) > self.min_delta:
            self.counter += 1
            if self.counter >= self.tolerance:
                return True
        else:
            self.counter = 0
            return False


def reward_fn(state, next_state):
    # rwd = 0
    # for i in range(len(state)):
    #     rwd += next_state[i] - state[i]
    # print("!!!!!!!!!! state", state)
    # print("!!!!!!!!!! next_state", next_state)
    reward = np.mean(next_state) - torch.mean(state)
    return reward