import numpy as np
from scipy.stats import norm


def introduce_missing_data(original_data, miss_rate):
    """Introduce missing data in MCAR scenario.
    If an observation happen to have all features deleted, we do it again.
    Args:
        - original_data: original data, shape (n, d)
        - miss_rate: missing rate between 0 and 1
    Returns:
        - miss_data: original data with introduced NaN, shape (n, d)
    """
    n, d = original_data.shape
    miss_mask = np.zeros((n, d), dtype=bool)

    for i in range(n):
        current_mask = np.random.uniform(low=0.0, high=1.0, size=d) < miss_rate
        while np.logical_and.reduce(current_mask):
            current_mask = np.random.uniform(low=0.0, high=1.0, size=d) < miss_rate
        miss_mask[i] = current_mask

    miss_data = np.copy(original_data)
    miss_data[miss_mask] = np.nan
    
    return miss_data


def normalization (data, parameters=None):
    """Normalize data in the range [0, 1].
    Args:
        - data: original data, shape (n, d)
        - parameters: if None, default is min/max normalization
    Returns:
        - norm_data: normalized data in [0, 1]
        - norm_parameters: min_val and max_val used for each column, shape (n, d)
    """
    _, dim = data.shape
    norm_data = data.copy()
    
    if parameters is None:
  
        # MixMax normalization
        min_val = np.zeros(dim)
        max_val = np.zeros(dim)
    
        # For each dimension
        for i in range(dim):
            min_val[i] = np.nanmin(norm_data[:,i])
            norm_data[:,i] = norm_data[:,i] - np.nanmin(norm_data[:,i])
            max_val[i] = np.nanmax(norm_data[:,i])
            norm_data[:,i] = norm_data[:,i] / (np.nanmax(norm_data[:,i]) + 1e-6)   
      
        # Return norm_parameters for renormalization
        norm_parameters = {"min_val": min_val, "max_val": max_val}

    else:
        min_val = parameters["min_val"]
        max_val = parameters["max_val"]
    
        # For each dimension
        for i in range(dim):
            norm_data[:,i] = norm_data[:,i] - min_val[i]
            norm_data[:,i] = norm_data[:,i] / (max_val[i] + 1e-6)  
      
        norm_parameters = parameters
    
    return norm_data, norm_parameters


def renormalization(norm_data, norm_parameters):
    """Renormalize data from [0, 1] back to the original range.
    Args:
        - norm_data: normalized data, shape (n, d)
        - norm_parameters: min_val and max_val used for each column
    Returns:
        - renorm_data: renormalized data in the original range, shape (n, d)
    """
    min_val = norm_parameters["min_val"]
    max_val = norm_parameters["max_val"]
    _, dim = norm_data.shape
    renorm_data = norm_data.copy()
    
    for i in range(dim):
        renorm_data[:,i] = renorm_data[:,i] * (max_val[i] + 1e-6)   
        renorm_data[:,i] = renorm_data[:,i] + min_val[i]
    
    return renorm_data


def compute_normalised_rmse(norm_original_data, norm_miss_data, norm_imputed_data):
    """Compute the normalized (in the range [0, 1]) RMSE.
    Args:
        - norm_original_data: shape (n, d)
        - norm_miss_data: shape (n, d)
        - norm_imputed_data: shape (n, d)
    Returns:
        - norm_rmse: value of the normalized RMSE
    """
    nb_miss = np.sum(np.isnan(norm_miss_data))
    sum_of_squares = np.sum((norm_original_data - norm_imputed_data)**2)
    norm_rmse = np.sqrt(sum_of_squares / nb_miss)
    return norm_rmse


def log_likelihood_model1(sample):
    """Compute the sample likehood under generative model 1.
    Args:
        - sample: complete sample in the original range, shape (n, d)
    Returns:
        - loglik: log-likelihood for each individual sample, shape (n,)
    """
    sigma1 = 0.05
    sigma2 = 0.1
    x1 = sample[:, 0]
    x2 = sample[:, 1]
    
    # Use Gaussian CDF to compute likelihood from Mollified Uniform distribution
    p1 = norm.cdf((1.0 - x1) / sigma1) - norm.cdf(- x1 / sigma1)
    p2 = norm.pdf(x=x2, loc=x1, scale=sigma2)
    
    loglik = np.log(p1) + np.log(p2)
    return loglik


def log_likelihood_model2(sample):
    """Compute the sample likehood under generative model 2.
    Args:
        - sample: complete sample in the original range, shape (n, d)
    Returns:
        - loglik: log-likelihood for each individual sample, shape (n,)
    """
    sigma1 = 0.05
    sigma2 = 0.2
    x1 = sample[:, 0]
    x2 = sample[:, 1]
    
    # Use Gaussian CDF to compute likelihood from Mollified Uniform distribution
    p1 = norm.cdf((1.0 - x1 / (4.0 * np.pi)) / sigma1) - norm.cdf(- x1 / (4.0 * np.pi) / sigma1)
    p2 = norm.pdf(x=x2, loc=np.sin(x1), scale=sigma2)
    
    loglik = np.log(p1) + np.log(p2)
    return loglik


def log_likelihood_model3(sample):
    """Compute the sample likehood under generative model 3.
    Args:
        - sample: complete sample in the original range, shape (n, d)
    Returns:
        - loglik: log-likelihood for each individual sample, shape (n,)
    """
    R = 1.0
    sigma = 0.1
    x1 = sample[:, 0]
    x2 = sample[:, 1]

    loglik = np.log(norm.pdf(np.sqrt(x1**2 + x2**2) - R, loc=0.0, scale=sigma) / (2.0 * np.pi))
    return loglik


def log_likelihood_model4(sample):
    """Compute the sample likehood under generative model 4.
    Args:
        - sample: complete sample in the original range, shape (n, d)
    Returns:
        - loglik: log-likelihood for each individual sample, shape (n,)
    """
    R = 1.0
    sigma = 0.1
    x1 = sample[:, 0]
    x2 = sample[:, 1]
    x3 = sample[:, 2]

    loglik = np.log(norm.pdf(np.sqrt(x1**2 + x2**2 + x3**2) - R, loc=0.0, scale=sigma) / (2.0 * np.pi) / np.pi)
    return loglik







