import numpy as np
from scipy.stats import norm, cauchy
import ray

def generate_data(dist_type, tau, n_samples, mu=0): 
    """
    Generate a synthetic data stream of length ``n_samples`` from the
    specified distribution and return both the data and the true τ-quantile.

    Parameters
    ----------
    dist_type : {'normal', 'uniform', 'cauchy'}
        Distribution family to draw from.
    tau : float
        Target quantile level (0 < τ < 1).
    n_samples : int
        Number of observations to generate.
    mu : float, default=0
        Location shift for the distribution.

    Returns
    -------
    data : ndarray
        Array of shape (n_samples,) containing the generated samples.
    true_q : float
        Ground-truth τ-quantile of the population distribution.
    """
    if dist_type == 'normal':
        # N(μ, 1)
        data = np.random.normal(mu, 1, n_samples)  
        true_q = mu + norm.ppf(tau)                # shift the standard-normal quantile
    elif dist_type == 'uniform':
        # U[μ−1, μ+1] so that the mean equals μ
        low, high = mu - 1, mu + 1                
        data = np.random.uniform(low, high, n_samples)
        true_q = low + (high - low) * tau        
    elif dist_type == 'cauchy':
        # Cauchy(μ, 1)
        data = np.random.standard_cauchy(size=n_samples) * 1 + mu  
        true_q = mu + cauchy.ppf(tau)           
    else:
        raise ValueError("Unsupported distribution type.")
    return data, true_q



def distribute_data(data, n_clients):
    """
    split the data into ``n_clients`` equal chunks.
    """
    return np.split(data, n_clients)

def package_results(raw_results):
    """
    Convert a list of raw tuples (true_q, est, var, mae) into a consistent
    dictionary-based format expected by downstream analyzers.
    """
    true_q, estimates, variances, maes = zip(*raw_results)
    return {
        'estimates': np.array(estimates),
        'variances': np.array(variances),
        'maes': np.array(maes),  # final MAE of each run
        'true_q': np.array(true_q)
    }

def analyze_results(results, z_score=6.74735):
    """
    Compute empirical coverage probability (ECP) and mean absolute error (MAE)
    for a given set of simulation results.

    Parameters
    ----------
    results : dict
        Output of :func:`package_results`.
    z_score : float, default=6.74735
        Critical value for the desired confidence level
        (e.g. Z_{1−α/2} or custom quantile).

    Returns
    -------
    dict
        Dictionary with keys ``'coverage'`` and ``'mae'``.
    """
    est = results['estimates']
    var = results['variances']
    true_q = results['true_q']
    
    # confidence interval
    lower = est - z_score * np.sqrt(var)
    upper = est + z_score * np.sqrt(var)
    coverage = np.mean((true_q >= lower) & (true_q <= upper))
    
    return {
        'coverage': coverage,
        'mae': np.mean(np.abs(est-true_q))
    }