from utils import *
import pandas as pd
import numpy as np
import warnings
from scipy.stats import norm
import matplotlib.pyplot as plt
import seaborn as sns
warnings.filterwarnings("ignore")

scores = load_single_cell_line('your_gene_data.csv', cell_line_index=5)
scores.index = [x.split(' ')[0] for x in scores.index]
FILE_CEG = 'CEGv2.txt'                
FILE_NEG = 'NEGv1.txt'   # reference gene set             
p_vals= get_p_values(scores, neg_file_path=FILE_NEG)

df_final = add_true_labels(p_vals, 'CEGv2.txt', 'NEGv1.txt')
df_labeled = df_final[df_final['Label'] != -1].copy()
p_vals_labeled = df_labeled['P_Value'].values
true_labels = df_labeled['Label'].values

reject_indices_harmonic, boundary_indices_harmonic,_ = harmonicpbfdr(p_vals_labeled,  alpha=0.1)
reject_indices_bh = BH(p_vals_labeled, q=0.1)
reject_indices_2domino, boundary_indices_2domino = bfdr_k_domino(p_vals_labeled,k=2,  alpha=0.1)
reject_indices_sl,boundary_indices_sl,_ = sl_procedure(p_vals_labeled, alpha=0.1)
is_boundary_false_harmonic = 0

        
n_rej_harmonic, prop_harmonic, n_fd_harmonic = evaluate_procedure(reject_indices_harmonic, p_vals_labeled, true_labels)
n_rej_bh, prop_bh, n_fd_bh = evaluate_procedure(reject_indices_bh, p_vals_labeled, true_labels)
n_rej_2domino, prop_2domino, n_fd_2domino = evaluate_procedure(reject_indices_2domino, p_vals_labeled, true_labels)
n_rej_sl, prop_sl, n_fd_sl = evaluate_procedure(reject_indices_sl, p_vals_labeled, true_labels)
k_values = [1, 5, 10, 20, 35, 50]
results_list = []

print(f"Evaluating boundary errors for k = {k_values}...")

for k_eval in range(1, 51):
    error_bfdr = evaluate_boundary_k_errors(reject_indices_harmonic, p_vals_labeled, true_labels, k=k_eval)
    error_bh = evaluate_boundary_k_errors(reject_indices_bh, p_vals_labeled, true_labels, k=k_eval)
    error_2domino = evaluate_boundary_k_errors(reject_indices_2domino, p_vals_labeled, true_labels, k=k_eval)
    error_sl = evaluate_boundary_k_errors(reject_indices_sl, p_vals_labeled, true_labels, k=k_eval)
    results_list.append({
        'k': k_eval,
        
        'Harmonic_Err': error_bfdr[0],
        'Harmonic_Rate': error_bfdr[0] / k_eval,
        
        'BH_Err': error_bh[0],
        'BH_Rate': error_bh[0] / k_eval,
        
        '2Domino_Err': error_2domino[0],
        '2Domino_Rate': error_2domino[0] / k_eval,
        
        'SL_Err': error_sl[0],
        'SL_Rate': error_sl[0] / k_eval
    })
df_results = pd.DataFrame(results_list)
print(df_results)

pool_pos_p = p_vals_labeled[true_labels == 1]  
pool_neg_p = p_vals_labeled[true_labels == 0]  

n_seeds = 100
alpha_list = [0.1, 0.2, 0.3] 
replace_flag = False
n_sample = 600
all_results_records = []
for alpha in alpha_list:
    print(f"\n>>> Running for alpha = {alpha}")
    for seed in range(n_seeds):
        np.random.seed(seed)
        sample_pos_p = np.random.choice(pool_pos_p, n_sample, replace=replace_flag)
        sample_pos_l = np.ones(n_sample)
        sample_neg_p = np.random.choice(pool_neg_p, n_sample, replace=replace_flag)
        sample_neg_l = np.zeros(n_sample)
        p_subset = np.concatenate([sample_pos_p, sample_neg_p])
        l_subset = np.concatenate([sample_pos_l, sample_neg_l])
        idx_har, boundary_indice_harmonic, _ = harmonicpbfdr(p_subset, alpha=alpha)
        idx_bh = BH(p_subset, q=alpha)
        idx_dom, boundary_indice_domino = bfdr_k_domino(p_subset, k=2, alpha=alpha)
        idx_dom3, boundary_indice_domino3 = bfdr_k_domino(p_subset, k=3, alpha=alpha)
        idx_sl,boundary_indice_sl,_= sl_procedure(p_subset, alpha=alpha)
        is_boundary_false_harmonic = 0
        if boundary_indice_harmonic > -1:
            if l_subset[boundary_indice_harmonic] == 0:
                is_boundary_false_harmonic = 1
                
        is_boundary_false_bh = 0
        if len(idx_bh) > 0:
            
            rejected_p_vals = p_subset[idx_bh]

            marginal_idx_rel = np.argmax(rejected_p_vals) 
            marginal_idx_abs = idx_bh[marginal_idx_rel]
       
            if l_subset[marginal_idx_abs] == 0:
                is_boundary_false_bh = 1
                
        is_boundary_false_2domino = kbfdr_evaluate(l_subset, boundary_indice_domino, k=2)
        is_boundary_false_3domino = kbfdr_evaluate(l_subset, boundary_indice_domino3, k=3)
        is_boundary_false_sl = 0
        if boundary_indice_sl > -1:
            if l_subset[boundary_indice_sl] == 0:
                is_boundary_false_sl = 1
        
        n_rej_harmonic, prop_harmonic, n_fd_harmonic = evaluate_procedure(idx_har, p_subset, l_subset)
        n_rej_bh, prop_bh, n_fd_bh = evaluate_procedure(idx_bh, p_subset, l_subset)
        n_rej_2domino, prop_2domino, n_fd_2domino = evaluate_procedure(idx_dom, p_subset, l_subset)
        n_rej_sl, prop_sl, n_fd_sl = evaluate_procedure(idx_sl, p_subset, l_subset)
        n_rej_3domino, prop_3domino, n_fd_3domino = evaluate_procedure(idx_dom3, p_subset, l_subset)
        record = {
            "alpha": alpha,            
            "seed": seed,              
            "n_rej_harmonic": n_rej_harmonic,           
            "rejectionprop_harmonic": prop_harmonic,             
            "n_fd_harmonic": n_fd_harmonic,             
            "boundary_error_harmonic": is_boundary_false_harmonic,
            
            "n_rej_2domino": n_rej_2domino,
            "rejectionprop_2domino": prop_2domino,
            "n_fd_2domino": n_fd_2domino,
            "boundary_error_2domino": is_boundary_false_2domino,
            
            "n_rej_3domino": n_rej_3domino,
            "rejectionprop_3domino": prop_3domino,
            "n_fd_3domino": n_fd_3domino,
            "boundary_error_3domino": is_boundary_false_3domino
        }
        all_results_records.append(record)
df_raw = pd.DataFrame(all_results_records)

df_summary = df_raw.drop('seed', axis=1).groupby('alpha').mean().reset_index()

sample_pos_p = np.random.choice(pool_pos_p, 100, replace=replace_flag)
sample_pos_l = np.ones(100)

sample_neg_p = np.random.choice(pool_neg_p, 100, replace=replace_flag)
sample_neg_l = np.zeros(100)

p_subset = np.concatenate([sample_pos_p, sample_neg_p])
l_subset = np.concatenate([sample_pos_l, sample_neg_l])

idx_har, boundary_indice_harmonic, _ = harmonicpbfdr(p_subset, alpha=alpha)
idx_bh = BH(p_subset, q=alpha)
idx_dom, boundary_indice_domino = bfdr_k_domino(p_subset, k=2, alpha=alpha)

threshold_harmonic = get_threshold(p_subset, idx_har)
threshold_bh = get_threshold(p_subset, idx_bh)
threshold_domino = get_threshold(p_subset, idx_dom)

results_df = pd.DataFrame({
    'P_Value': p_subset,
    'Label': l_subset,
    'Rejected_Harmonic': [1 if p <= threshold_harmonic else 0 for p in p_subset],
    'Rejected_BH':       [1 if p <= threshold_bh else 0 for p in p_subset],
    'Rejected_Domino':   [1 if p <= threshold_domino else 0 for p in p_subset]
})