import h5py
import numpy as np
import math
from scipy.integrate import quad
from scipy.stats import beta, binom
from scipy.optimize import minimize
import time
from multiprocessing import Pool, cpu_count


def get_beta_params_recall(file_name, k):
    
    with h5py.File(file_name, 'r') as f:
        dataset = f['distances'][:]
        np.random.seed(42)
        selected_indices = np.random.choice(len(dataset), size=1000, replace=False)
        sampled_dataset = dataset[selected_indices]
    angular = [[] for _ in range(k)]
    for i in range(len(sampled_dataset)):
        for j in range(k):
            angular[j].append((math.cos(sampled_dataset[i][j])))
    angular = np.array(angular)
    beta_params = []
    for i, data in enumerate(angular):
        scaled_data = (data + 1) / 2
        scaled_data = np.clip(scaled_data, 1e-6, 1-1e-6)
        def neg_log_likelihood(params, data):
            a, b = params
            return -np.sum(beta.logpdf(data, a, b, loc=0, scale=1))
        mean_data = np.mean(scaled_data)
        var_data = np.var(scaled_data)
        a_init = max(0.1, mean_data * (mean_data * (1 - mean_data) / var_data - 1))
        b_init = max(0.1, a_init * (1 - mean_data) / mean_data)
        result = minimize(neg_log_likelihood, [a_init, b_init], args=(scaled_data,),
                            method='Nelder-Mead', bounds=[(0.1, None), (0.1, None)])
        beta_params.append(tuple(result.x))
    return beta_params

def integrand_recall_optimized(theta, M, d, beta_params):

    a, b = beta_params
    similarity = theta / math.pi
    prob = binom.cdf(d, M, similarity)
    pdf_value = beta.pdf((math.cos(theta) + 1) / 2, a, b) / 2 * math.sin(theta)
    return prob * pdf_value

def calculate_single_rho(args):
   
    m, d, params = args
    integral, _ = quad(integrand_recall_optimized, 0, math.pi, args=(m, d, params), epsabs=1e-12, epsrel=1e-12, limit=5000)
    return integral

def get_recall_parallel(d, m, K, beta_params):


    tasks = [(m, d, beta_params[k]) for k in range(K)]

    with Pool(processes=cpu_count()) as pool:
        rho_values = pool.map(calculate_single_rho, tasks)

  
    rho = np.mean(rho_values)
    
    return rho


