from sklearn.metrics import silhouette_score
import numpy as np

def runUMAPClustering(embs, int_labels, npz_file):
        """
        Computes the clustering coefficient per layer using silhouette score, as well as confidence intervals.
        """
        
        coeffs = []
        coeffs_ci_lower = []
        coeffs_ci_upper = []
        
        # Parameters for bootstrap confidence intervals
        n_bootstrap = 1000
        confidence_level = 0.95
        alpha = 1 - confidence_level
        
        for emb in embs:
            # Compute clustering coefficient per layer using silhouette score
            # labels = np.array(subj_markers)  # o/s/* → convert to integers if needed

            # Original score
            score = silhouette_score(emb, int_labels)
            coeffs.append(score)
            
            # Bootstrap confidence intervals
            bootstrap_scores = []
            n_samples = len(emb)
            
            for _ in range(n_bootstrap):
                # Resample with replacement
                indices = np.random.choice(n_samples, size=n_samples, replace=True)
                emb_bootstrap = emb[indices]
                labels_bootstrap = int_labels[indices]
                
                # Only compute if we have multiple unique labels
                if len(np.unique(labels_bootstrap)) > 1:
                    bootstrap_score = silhouette_score(emb_bootstrap, labels_bootstrap)
                    bootstrap_scores.append(bootstrap_score)
            
            # Compute confidence intervals
            if bootstrap_scores:
                ci_lower = np.percentile(bootstrap_scores, 100 * alpha/2)
                ci_upper = np.percentile(bootstrap_scores, 100 * (1 - alpha/2))
            else:
                ci_lower = score
                ci_upper = score
                
            coeffs_ci_lower.append(ci_lower)
            coeffs_ci_upper.append(ci_upper)
        
        coeffs = np.array(coeffs)
        coeffs_ci_lower = np.array(coeffs_ci_lower)
        coeffs_ci_upper = np.array(coeffs_ci_upper)
        
        np.savez(npz_file,
                coeffs=coeffs, 
                coeffs_ci_lower=coeffs_ci_lower, 
                coeffs_ci_upper=coeffs_ci_upper)
        print(f"Saved UMAP clustering data → {npz_file}")